upload PaddleOCR code
This commit is contained in:
parent
bc93c549fe
commit
338ba3ee4a
|
@ -0,0 +1,35 @@
|
|||
- repo: https://github.com/PaddlePaddle/mirrors-yapf.git
|
||||
sha: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37
|
||||
hooks:
|
||||
- id: yapf
|
||||
files: \.py$
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
sha: a11d9314b22d8f8c7556443875b731ef05965464
|
||||
hooks:
|
||||
- id: check-merge-conflict
|
||||
- id: check-symlinks
|
||||
- id: detect-private-key
|
||||
files: (?!.*paddle)^.*$
|
||||
- id: end-of-file-fixer
|
||||
files: \.md$
|
||||
- id: trailing-whitespace
|
||||
files: \.md$
|
||||
- repo: https://github.com/Lucas-C/pre-commit-hooks
|
||||
sha: v1.0.1
|
||||
hooks:
|
||||
- id: forbid-crlf
|
||||
files: \.md$
|
||||
- id: remove-crlf
|
||||
files: \.md$
|
||||
- id: forbid-tabs
|
||||
files: \.md$
|
||||
- id: remove-tabs
|
||||
files: \.md$
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: clang-format
|
||||
name: clang-format
|
||||
description: Format files with ClangFormat
|
||||
entry: bash .clang_format.hook -i
|
||||
language: system
|
||||
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|cuh|proto)$
|
|
@ -0,0 +1,3 @@
|
|||
[style]
|
||||
based_on_style = pep8
|
||||
column_limit = 80
|
|
@ -0,0 +1,22 @@
|
|||
TrainReader:
|
||||
reader_function: ppocr.data.det.dataset_traversal,TrainReader
|
||||
process_function: ppocr.data.det.db_process,DBProcessTrain
|
||||
num_workers: 8
|
||||
img_set_dir: ./train_data/icdar2015/text_localization/
|
||||
label_file_path: ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
|
||||
|
||||
EvalReader:
|
||||
reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
|
||||
process_function: ppocr.data.det.db_process,DBProcessTest
|
||||
img_set_dir: ./train_data/icdar2015/text_localization/
|
||||
label_file_path: ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
|
||||
test_image_shape: [736, 1280]
|
||||
|
||||
TestReader:
|
||||
reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
|
||||
process_function: ppocr.data.det.db_process,DBProcessTest
|
||||
single_img_path:
|
||||
img_set_dir: ./train_data/icdar2015/text_localization/
|
||||
label_file_path: ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
|
||||
test_image_shape: [736, 1280]
|
||||
do_eval: True
|
|
@ -0,0 +1,51 @@
|
|||
Global:
|
||||
algorithm: DB
|
||||
use_gpu: true
|
||||
epoch_num: 1200
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 2
|
||||
save_model_dir: output
|
||||
save_epoch_step: 200
|
||||
eval_batch_step: 5000
|
||||
train_batch_size_per_card: 16
|
||||
test_batch_size_per_card: 16
|
||||
image_shape: [3, 640, 640]
|
||||
reader_yml: ./configs/det/det_db_icdar15_reader.yml
|
||||
pretrain_weights: ./pretrain_models/MobileNetV3_pretrained/MobileNetV3_large_x0_5_pretrained/
|
||||
save_res_path: ./output/predicts_db.txt
|
||||
|
||||
Architecture:
|
||||
function: ppocr.modeling.architectures.det_model,DetModel
|
||||
|
||||
Backbone:
|
||||
function: ppocr.modeling.backbones.det_mobilenet_v3,MobileNetV3
|
||||
scale: 0.5
|
||||
model_name: large
|
||||
|
||||
Head:
|
||||
function: ppocr.modeling.heads.det_db_head,DBHead
|
||||
model_name: large
|
||||
k: 50
|
||||
inner_channels: 96
|
||||
out_channels: 2
|
||||
|
||||
Loss:
|
||||
function: ppocr.modeling.losses.det_db_loss,DBLoss
|
||||
balance_loss: true
|
||||
main_loss_type: DiceLoss
|
||||
alpha: 5
|
||||
beta: 10
|
||||
ohem_ratio: 3
|
||||
|
||||
Optimizer:
|
||||
function: ppocr.optimizer,AdamDecay
|
||||
base_lr: 0.001
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
||||
|
||||
PostProcess:
|
||||
function: ppocr.postprocess.db_postprocess,DBPostProcess
|
||||
thresh: 0.3
|
||||
box_thresh: 0.7
|
||||
max_candidates: 1000
|
||||
unclip_ratio: 1.5
|
|
@ -0,0 +1,51 @@
|
|||
Global:
|
||||
algorithm: DB
|
||||
use_gpu: true
|
||||
epoch_num: 1200
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 2
|
||||
save_model_dir: output
|
||||
save_epoch_step: 200
|
||||
eval_batch_step: 5000
|
||||
train_batch_size_per_card: 8
|
||||
test_batch_size_per_card: 16
|
||||
image_shape: [3, 640, 640]
|
||||
reader_yml: ./configs/det/det_db_icdar15_reader.yml
|
||||
pretrain_weights: ./pretrain_models/ResNet50_vd_pretrained/
|
||||
save_res_path: ./output/predicts_db.txt
|
||||
|
||||
Architecture:
|
||||
function: ppocr.modeling.architectures.det_model,DetModel
|
||||
|
||||
Backbone:
|
||||
function: ppocr.modeling.backbones.det_resnet_vd,ResNet
|
||||
layers: 50
|
||||
|
||||
Head:
|
||||
function: ppocr.modeling.heads.det_db_head,DBHead
|
||||
model_name: large
|
||||
k: 50
|
||||
inner_channels: 256
|
||||
out_channels: 2
|
||||
|
||||
Loss:
|
||||
function: ppocr.modeling.losses.det_db_loss,DBLoss
|
||||
balance_loss: true
|
||||
main_loss_type: DiceLoss
|
||||
alpha: 5
|
||||
beta: 10
|
||||
ohem_ratio: 3
|
||||
|
||||
Optimizer:
|
||||
function: ppocr.optimizer,AdamDecay
|
||||
base_lr: 0.001
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
||||
|
||||
PostProcess:
|
||||
function: ppocr.postprocess.db_postprocess,DBPostProcess
|
||||
thresh: 0.3
|
||||
box_thresh: 0.7
|
||||
max_candidates: 1000
|
||||
unclip_ratio: 1.5
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
TrainReader:
|
||||
reader_function: ppocr.data.det.dataset_traversal,TrainReader
|
||||
process_function: ppocr.data.det.east_process,EASTProcessTrain
|
||||
num_workers: 8
|
||||
img_set_dir: ./train_data/icdar2015/text_localization/
|
||||
label_file_path: ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
|
||||
background_ratio: 0.125
|
||||
min_crop_side_ratio: 0.1
|
||||
min_text_size: 10
|
||||
|
||||
EvalReader:
|
||||
reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
|
||||
process_function: ppocr.data.det.east_process,EASTProcessTest
|
||||
img_set_dir: ./train_data/icdar2015/text_localization/
|
||||
label_file_path: ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
|
||||
|
||||
TestReader:
|
||||
reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
|
||||
process_function: ppocr.data.det.east_process,EASTProcessTest
|
||||
single_img_path:
|
||||
img_set_dir: ./train_data/icdar2015/text_localization/
|
||||
label_file_path: ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
|
||||
do_eval: True
|
|
@ -0,0 +1,43 @@
|
|||
Global:
|
||||
algorithm: EAST
|
||||
use_gpu: true
|
||||
epoch_num: 100000
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 5
|
||||
save_model_dir: output
|
||||
save_epoch_step: 200
|
||||
eval_batch_step: 5000
|
||||
train_batch_size_per_card: 16
|
||||
test_batch_size_per_card: 16
|
||||
image_shape: [3, 512, 512]
|
||||
reader_yml: ./configs/det/det_east_icdar15_reader.yml
|
||||
pretrain_weights: ./pretrain_models/MobileNetV3_pretrained/MobileNetV3_large_x0_5_pretrained/
|
||||
save_res_path: ./output/predicts_east.txt
|
||||
|
||||
Architecture:
|
||||
function: ppocr.modeling.architectures.det_model,DetModel
|
||||
|
||||
Backbone:
|
||||
function: ppocr.modeling.backbones.det_mobilenet_v3,MobileNetV3
|
||||
scale: 0.5
|
||||
model_name: large
|
||||
|
||||
Head:
|
||||
function: ppocr.modeling.heads.det_east_head,EASTHead
|
||||
model_name: small
|
||||
|
||||
Loss:
|
||||
function: ppocr.modeling.losses.det_east_loss,EASTLoss
|
||||
|
||||
Optimizer:
|
||||
function: ppocr.optimizer,AdamDecay
|
||||
base_lr: 0.001
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
||||
|
||||
PostProcess:
|
||||
function: ppocr.postprocess.east_postprocess,EASTPostPocess
|
||||
score_thresh: 0.8
|
||||
cover_thresh: 0.1
|
||||
nms_thresh: 0.2
|
||||
|
|
@ -0,0 +1,42 @@
|
|||
Global:
|
||||
algorithm: EAST
|
||||
use_gpu: true
|
||||
epoch_num: 100000
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 5
|
||||
save_model_dir: output
|
||||
save_epoch_step: 200
|
||||
eval_batch_step: 5000
|
||||
train_batch_size_per_card: 8
|
||||
test_batch_size_per_card: 16
|
||||
image_shape: [3, 512, 512]
|
||||
reader_yml: ./configs/det/det_east_icdar15_reader.yml
|
||||
pretrain_weights: ./pretrain_models/ResNet50_vd_pretrained/
|
||||
save_res_path: ./output/predicts_east.txt
|
||||
|
||||
Architecture:
|
||||
function: ppocr.modeling.architectures.det_model,DetModel
|
||||
|
||||
Backbone:
|
||||
function: ppocr.modeling.backbones.det_resnet_vd,ResNet
|
||||
layers: 50
|
||||
|
||||
Head:
|
||||
function: ppocr.modeling.heads.det_east_head,EASTHead
|
||||
model_name: large
|
||||
|
||||
Loss:
|
||||
function: ppocr.modeling.losses.det_east_loss,EASTLoss
|
||||
|
||||
Optimizer:
|
||||
function: ppocr.optimizer,AdamDecay
|
||||
base_lr: 0.001
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
||||
|
||||
PostProcess:
|
||||
function: ppocr.postprocess.east_postprocess,EASTPostPocess
|
||||
score_thresh: 0.8
|
||||
cover_thresh: 0.1
|
||||
nms_thresh: 0.2
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
TrainReader:
|
||||
reader_function: ppocr.data.rec.dataset_traversal,LMDBReader
|
||||
num_workers: 8
|
||||
lmdb_sets_dir: ./train_data/data_lmdb_release/training/
|
||||
|
||||
EvalReader:
|
||||
reader_function: ppocr.data.rec.dataset_traversal,LMDBReader
|
||||
lmdb_sets_dir: ./train_data/data_lmdb_release/validation/
|
||||
|
||||
TestReader:
|
||||
reader_function: ppocr.data.rec.dataset_traversal,LMDBReader
|
||||
lmdb_sets_dir: ./train_data/data_lmdb_release/evaluation/
|
|
@ -0,0 +1,42 @@
|
|||
Global:
|
||||
algorithm: CRNN
|
||||
dataset: common
|
||||
use_gpu: true
|
||||
epoch_num: 300
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 10
|
||||
save_model_dir: output
|
||||
save_epoch_step: 3
|
||||
eval_batch_step: 2000
|
||||
train_batch_size_per_card: 256
|
||||
test_batch_size_per_card: 256
|
||||
image_shape: [3, 32, 100]
|
||||
max_text_length: 25
|
||||
character_type: ch
|
||||
character_dict_path: ./ppocr/utils/ppocr_keys_v1.txt
|
||||
loss_type: ctc
|
||||
reader_yml: ./configs/rec/rec_chinese_reader.yml
|
||||
pretrain_weights:
|
||||
|
||||
Architecture:
|
||||
function: ppocr.modeling.architectures.rec_model,RecModel
|
||||
|
||||
Backbone:
|
||||
function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3
|
||||
scale: 0.5
|
||||
model_name: small
|
||||
|
||||
Head:
|
||||
function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
|
||||
encoder_type: rnn
|
||||
SeqRNN:
|
||||
hidden_size: 48
|
||||
|
||||
Loss:
|
||||
function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
|
||||
|
||||
Optimizer:
|
||||
function: ppocr.optimizer,AdamDecay
|
||||
base_lr: 0.001
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
|
@ -0,0 +1,14 @@
|
|||
TrainReader:
|
||||
reader_function: ppocr.data.rec.dataset_traversal,SimpleReader
|
||||
num_workers: 8
|
||||
img_set_dir: .
|
||||
label_file_path: ./train_data/hard_label.txt
|
||||
|
||||
EvalReader:
|
||||
reader_function: ppocr.data.rec.dataset_traversal,SimpleReader
|
||||
img_set_dir: .
|
||||
label_file_path: ./train_data/label_val_all.txt
|
||||
|
||||
TestReader:
|
||||
reader_function: ppocr.data.rec.dataset_traversal,SimpleReader
|
||||
infer_img: ./infer_img
|
|
@ -0,0 +1,40 @@
|
|||
Global:
|
||||
algorithm: CRNN
|
||||
use_gpu: true
|
||||
epoch_num: 72
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 10
|
||||
save_model_dir: output
|
||||
save_epoch_step: 3
|
||||
eval_batch_step: 2000
|
||||
train_batch_size_per_card: 256
|
||||
test_batch_size_per_card: 256
|
||||
image_shape: [3, 32, 100]
|
||||
max_text_length: 25
|
||||
character_type: en
|
||||
loss_type: ctc
|
||||
reader_yml: ./configs/rec/rec_benchmark_reader.yml
|
||||
pretrain_weights:
|
||||
|
||||
Architecture:
|
||||
function: ppocr.modeling.architectures.rec_model,RecModel
|
||||
|
||||
Backbone:
|
||||
function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3
|
||||
scale: 0.5
|
||||
model_name: large
|
||||
|
||||
Head:
|
||||
function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
|
||||
encoder_type: rnn
|
||||
SeqRNN:
|
||||
hidden_size: 96
|
||||
|
||||
Loss:
|
||||
function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
|
||||
|
||||
Optimizer:
|
||||
function: ppocr.optimizer,AdamDecay
|
||||
base_lr: 0.001
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
|
@ -0,0 +1,38 @@
|
|||
Global:
|
||||
algorithm: Rosetta
|
||||
use_gpu: true
|
||||
epoch_num: 72
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 10
|
||||
save_model_dir: output
|
||||
save_epoch_step: 3
|
||||
eval_batch_step: 2000
|
||||
train_batch_size_per_card: 256
|
||||
test_batch_size_per_card: 256
|
||||
image_shape: [3, 32, 100]
|
||||
max_text_length: 25
|
||||
character_type: en
|
||||
loss_type: ctc
|
||||
reader_yml: ./configs/rec/rec_benchmark_reader.yml
|
||||
pretrain_weights:
|
||||
|
||||
Architecture:
|
||||
function: ppocr.modeling.architectures.rec_model,RecModel
|
||||
|
||||
Backbone:
|
||||
function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3
|
||||
scale: 0.5
|
||||
model_name: large
|
||||
|
||||
Head:
|
||||
function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
|
||||
encoder_type: reshape
|
||||
|
||||
Loss:
|
||||
function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
|
||||
|
||||
Optimizer:
|
||||
function: ppocr.optimizer,AdamDecay
|
||||
base_lr: 0.001
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
|
@ -0,0 +1,49 @@
|
|||
Global:
|
||||
algorithm: RARE
|
||||
use_gpu: true
|
||||
epoch_num: 72
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 10
|
||||
save_model_dir: output
|
||||
save_epoch_step: 3
|
||||
eval_batch_step: 2000
|
||||
train_batch_size_per_card: 256
|
||||
test_batch_size_per_card: 256
|
||||
image_shape: [3, 32, 100]
|
||||
max_text_length: 25
|
||||
character_type: en
|
||||
loss_type: attention
|
||||
reader_yml: ./configs/rec/rec_benchmark_reader.yml
|
||||
pretrain_weights:
|
||||
|
||||
Architecture:
|
||||
function: ppocr.modeling.architectures.rec_model,RecModel
|
||||
|
||||
TPS:
|
||||
function: ppocr.modeling.stns.tps,TPS
|
||||
num_fiducial: 20
|
||||
loc_lr: 0.1
|
||||
model_name: small
|
||||
|
||||
Backbone:
|
||||
function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3
|
||||
scale: 0.5
|
||||
model_name: large
|
||||
|
||||
Head:
|
||||
function: ppocr.modeling.heads.rec_attention_head,AttentionPredict
|
||||
encoder_type: rnn
|
||||
SeqRNN:
|
||||
hidden_size: 96
|
||||
Attention:
|
||||
decoder_size: 96
|
||||
word_vector_dim: 96
|
||||
|
||||
Loss:
|
||||
function: ppocr.modeling.losses.rec_attention_loss,AttentionLoss
|
||||
|
||||
Optimizer:
|
||||
function: ppocr.optimizer,AdamDecay
|
||||
base_lr: 0.001
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
|
@ -0,0 +1,46 @@
|
|||
Global:
|
||||
algorithm: STARNet
|
||||
use_gpu: true
|
||||
epoch_num: 72
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 10
|
||||
save_model_dir: output
|
||||
save_epoch_step: 3
|
||||
eval_batch_step: 2000
|
||||
train_batch_size_per_card: 256
|
||||
test_batch_size_per_card: 256
|
||||
image_shape: [3, 32, 100]
|
||||
max_text_length: 25
|
||||
character_type: en
|
||||
loss_type: ctc
|
||||
reader_yml: ./configs/rec/rec_benchmark_reader.yml
|
||||
pretrain_weights:
|
||||
|
||||
Architecture:
|
||||
function: ppocr.modeling.architectures.rec_model,RecModel
|
||||
|
||||
TPS:
|
||||
function: ppocr.modeling.stns.tps,TPS
|
||||
num_fiducial: 20
|
||||
loc_lr: 0.1
|
||||
model_name: small
|
||||
|
||||
Backbone:
|
||||
function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3
|
||||
scale: 0.5
|
||||
model_name: large
|
||||
|
||||
Head:
|
||||
function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
|
||||
encoder_type: rnn
|
||||
SeqRNN:
|
||||
hidden_size: 96
|
||||
|
||||
Loss:
|
||||
function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
|
||||
|
||||
Optimizer:
|
||||
function: ppocr.optimizer,AdamDecay
|
||||
base_lr: 0.001
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
|
@ -0,0 +1,39 @@
|
|||
Global:
|
||||
algorithm: CRNN
|
||||
use_gpu: true
|
||||
epoch_num: 72
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 10
|
||||
save_model_dir: output
|
||||
save_epoch_step: 3
|
||||
eval_batch_step: 2000
|
||||
train_batch_size_per_card: 256
|
||||
test_batch_size_per_card: 256
|
||||
image_shape: [3, 32, 100]
|
||||
max_text_length: 25
|
||||
character_type: en
|
||||
loss_type: ctc
|
||||
reader_yml: ./configs/rec/rec_benchmark_reader.yml
|
||||
pretrain_weights:
|
||||
|
||||
Architecture:
|
||||
function: ppocr.modeling.architectures.rec_model,RecModel
|
||||
|
||||
Backbone:
|
||||
function: ppocr.modeling.backbones.rec_resnet_vd,ResNet
|
||||
layers: 34
|
||||
|
||||
Head:
|
||||
function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
|
||||
encoder_type: rnn
|
||||
SeqRNN:
|
||||
hidden_size: 256
|
||||
|
||||
Loss:
|
||||
function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
|
||||
|
||||
Optimizer:
|
||||
function: ppocr.optimizer,AdamDecay
|
||||
base_lr: 0.001
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
|
@ -0,0 +1,37 @@
|
|||
Global:
|
||||
algorithm: Rosetta
|
||||
use_gpu: true
|
||||
epoch_num: 72
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 10
|
||||
save_model_dir: output
|
||||
save_epoch_step: 3
|
||||
eval_batch_step: 2000
|
||||
train_batch_size_per_card: 256
|
||||
test_batch_size_per_card: 256
|
||||
image_shape: [3, 32, 100]
|
||||
max_text_length: 25
|
||||
character_type: en
|
||||
loss_type: ctc
|
||||
reader_yml: ./configs/rec/rec_benchmark_reader.yml
|
||||
pretrain_weights:
|
||||
|
||||
Architecture:
|
||||
function: ppocr.modeling.architectures.rec_model,RecModel
|
||||
|
||||
Backbone:
|
||||
function: ppocr.modeling.backbones.rec_resnet_vd,ResNet
|
||||
layers: 34
|
||||
|
||||
Head:
|
||||
function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
|
||||
encoder_type: reshape
|
||||
|
||||
Loss:
|
||||
function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
|
||||
|
||||
Optimizer:
|
||||
function: ppocr.optimizer,AdamDecay
|
||||
base_lr: 0.001
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
|
@ -0,0 +1,48 @@
|
|||
Global:
|
||||
algorithm: RARE
|
||||
use_gpu: true
|
||||
epoch_num: 72
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 10
|
||||
save_model_dir: output
|
||||
save_epoch_step: 3
|
||||
eval_batch_step: 2000
|
||||
train_batch_size_per_card: 256
|
||||
test_batch_size_per_card: 256
|
||||
image_shape: [3, 32, 100]
|
||||
max_text_length: 25
|
||||
character_type: en
|
||||
loss_type: attention
|
||||
reader_yml: ./configs/rec/rec_benchmark_reader.yml
|
||||
pretrain_weights:
|
||||
|
||||
Architecture:
|
||||
function: ppocr.modeling.architectures.rec_model,RecModel
|
||||
|
||||
TPS:
|
||||
function: ppocr.modeling.stns.tps,TPS
|
||||
num_fiducial: 20
|
||||
loc_lr: 0.1
|
||||
model_name: large
|
||||
|
||||
Backbone:
|
||||
function: ppocr.modeling.backbones.rec_resnet_vd,ResNet
|
||||
layers: 34
|
||||
|
||||
Head:
|
||||
function: ppocr.modeling.heads.rec_attention_head,AttentionPredict
|
||||
encoder_type: rnn
|
||||
SeqRNN:
|
||||
hidden_size: 256
|
||||
Attention:
|
||||
decoder_size: 128
|
||||
word_vector_dim: 128
|
||||
|
||||
Loss:
|
||||
function: ppocr.modeling.losses.rec_attention_loss,AttentionLoss
|
||||
|
||||
Optimizer:
|
||||
function: ppocr.optimizer,AdamDecay
|
||||
base_lr: 0.001
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
|
@ -0,0 +1,45 @@
|
|||
Global:
|
||||
algorithm: STARNet
|
||||
use_gpu: true
|
||||
epoch_num: 72
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 10
|
||||
save_model_dir: output
|
||||
save_epoch_step: 3
|
||||
eval_batch_step: 2000
|
||||
train_batch_size_per_card: 256
|
||||
test_batch_size_per_card: 256
|
||||
image_shape: [3, 32, 100]
|
||||
max_text_length: 25
|
||||
character_type: en
|
||||
loss_type: ctc
|
||||
reader_yml: ./configs/rec/rec_benchmark_reader.yml
|
||||
pretrain_weights:
|
||||
|
||||
Architecture:
|
||||
function: ppocr.modeling.architectures.rec_model,RecModel
|
||||
|
||||
TPS:
|
||||
function: ppocr.modeling.stns.tps,TPS
|
||||
num_fiducial: 20
|
||||
loc_lr: 0.1
|
||||
model_name: large
|
||||
|
||||
Backbone:
|
||||
function: ppocr.modeling.backbones.rec_resnet_vd,ResNet
|
||||
layers: 34
|
||||
|
||||
Head:
|
||||
function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
|
||||
encoder_type: rnn
|
||||
SeqRNN:
|
||||
hidden_size: 256
|
||||
|
||||
Loss:
|
||||
function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
|
||||
|
||||
Optimizer:
|
||||
function: ppocr.optimizer,AdamDecay
|
||||
base_lr: 0.001
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
|
@ -0,0 +1,13 @@
|
|||
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
|
@ -0,0 +1,13 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
|
@ -0,0 +1,47 @@
|
|||
# -*- coding:utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import numpy as np
|
||||
import random
|
||||
import cv2
|
||||
import math
|
||||
|
||||
import imgaug
|
||||
import imgaug.augmenters as iaa
|
||||
|
||||
|
||||
def AugmentData(data):
|
||||
img = data['image']
|
||||
shape = img.shape
|
||||
|
||||
aug = iaa.Sequential(
|
||||
[iaa.Fliplr(0.5), iaa.Affine(rotate=(-10, 10)), iaa.Resize(
|
||||
(0.5, 3))]).to_deterministic()
|
||||
|
||||
def may_augment_annotation(aug, data, shape):
|
||||
if aug is None:
|
||||
return data
|
||||
|
||||
line_polys = []
|
||||
for poly in data['polys']:
|
||||
new_poly = may_augment_poly(aug, shape, poly)
|
||||
line_polys.append(new_poly)
|
||||
data['polys'] = np.array(line_polys)
|
||||
return data
|
||||
|
||||
def may_augment_poly(aug, img_shape, poly):
|
||||
keypoints = [imgaug.Keypoint(p[0], p[1]) for p in poly]
|
||||
keypoints = aug.augment_keypoints(
|
||||
[imgaug.KeypointsOnImage(
|
||||
keypoints, shape=img_shape)])[0].keypoints
|
||||
poly = [(p.x, p.y) for p in keypoints]
|
||||
return poly
|
||||
|
||||
img_aug = aug.augment_image(img)
|
||||
data['image'] = img_aug
|
||||
data = may_augment_annotation(aug, data, shape)
|
||||
return data
|
|
@ -0,0 +1,110 @@
|
|||
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
|
||||
import os
|
||||
import math
|
||||
import random
|
||||
import functools
|
||||
import numpy as np
|
||||
import cv2
|
||||
import string
|
||||
from ppocr.utils.utility import initial_logger
|
||||
logger = initial_logger()
|
||||
from ppocr.utils.utility import create_module
|
||||
import time
|
||||
|
||||
|
||||
class TrainReader(object):
|
||||
def __init__(self, params):
|
||||
self.num_workers = params['num_workers']
|
||||
self.label_file_path = params['label_file_path']
|
||||
self.batch_size = params['train_batch_size_per_card']
|
||||
assert 'process_function' in params,\
|
||||
"absence process_function in Reader"
|
||||
self.process = create_module(params['process_function'])(params)
|
||||
|
||||
def __call__(self, process_id):
|
||||
def sample_iter_reader():
|
||||
with open(self.label_file_path, "rb") as fin:
|
||||
label_infor_list = fin.readlines()
|
||||
img_num = len(label_infor_list)
|
||||
img_id_list = list(range(img_num))
|
||||
random.shuffle(img_id_list)
|
||||
for img_id in range(process_id, img_num, self.num_workers):
|
||||
label_infor = label_infor_list[img_id_list[img_id]]
|
||||
outs = self.process(label_infor)
|
||||
if outs is None:
|
||||
continue
|
||||
yield outs
|
||||
|
||||
def batch_iter_reader():
|
||||
batch_outs = []
|
||||
for outs in sample_iter_reader():
|
||||
batch_outs.append(outs)
|
||||
if len(batch_outs) == self.batch_size:
|
||||
yield batch_outs
|
||||
batch_outs = []
|
||||
if len(batch_outs) != 0:
|
||||
yield batch_outs
|
||||
|
||||
return batch_iter_reader
|
||||
|
||||
|
||||
class EvalTestReader(object):
|
||||
def __init__(self, params):
|
||||
self.params = params
|
||||
assert 'process_function' in params,\
|
||||
"absence process_function in EvalTestReader"
|
||||
|
||||
def __call__(self, mode):
|
||||
process_function = create_module(self.params['process_function'])(
|
||||
self.params)
|
||||
batch_size = self.params['test_batch_size_per_card']
|
||||
|
||||
flag_test_single_img = False
|
||||
if mode == "test":
|
||||
single_img_path = self.params['single_img_path']
|
||||
if single_img_path is not None:
|
||||
flag_test_single_img = True
|
||||
|
||||
img_list = []
|
||||
if flag_test_single_img:
|
||||
img_list.append([single_img_path, single_img_path])
|
||||
else:
|
||||
img_set_dir = self.params['img_set_dir']
|
||||
img_name_list_path = self.params['label_file_path']
|
||||
with open(img_name_list_path, "rb") as fin:
|
||||
lines = fin.readlines()
|
||||
for line in lines:
|
||||
img_name = line.decode().strip("\n").split("\t")[0]
|
||||
img_path = img_set_dir + "/" + img_name
|
||||
img_list.append([img_path, img_name])
|
||||
|
||||
def batch_iter_reader():
|
||||
batch_outs = []
|
||||
for img_path, img_name in img_list:
|
||||
img = cv2.imread(img_path)
|
||||
if img is None:
|
||||
logger.info("load image error:" + img_path)
|
||||
continue
|
||||
outs = process_function(img)
|
||||
outs.append(img_name)
|
||||
batch_outs.append(outs)
|
||||
if len(batch_outs) == batch_size:
|
||||
yield batch_outs
|
||||
batch_outs = []
|
||||
if len(batch_outs) != 0:
|
||||
yield batch_outs
|
||||
|
||||
return batch_iter_reader
|
|
@ -0,0 +1,192 @@
|
|||
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
|
||||
import math
|
||||
import cv2
|
||||
import numpy as np
|
||||
import json
|
||||
import sys
|
||||
|
||||
from .data_augment import AugmentData
|
||||
from .random_crop_data import RandomCropData
|
||||
from .make_shrink_map import MakeShrinkMap
|
||||
from .make_border_map import MakeBorderMap
|
||||
|
||||
|
||||
class DBProcessTrain(object):
|
||||
def __init__(self, params):
|
||||
self.img_set_dir = params['img_set_dir']
|
||||
self.image_shape = params['image_shape']
|
||||
|
||||
def order_points_clockwise(self, pts):
|
||||
rect = np.zeros((4, 2), dtype="float32")
|
||||
s = pts.sum(axis=1)
|
||||
rect[0] = pts[np.argmin(s)]
|
||||
rect[2] = pts[np.argmax(s)]
|
||||
diff = np.diff(pts, axis=1)
|
||||
rect[1] = pts[np.argmin(diff)]
|
||||
rect[3] = pts[np.argmax(diff)]
|
||||
return rect
|
||||
|
||||
def make_data_dict(self, imgvalue, entry):
|
||||
boxes = []
|
||||
texts = []
|
||||
ignores = []
|
||||
for rect in entry:
|
||||
points = rect['points']
|
||||
transcription = rect['transcription']
|
||||
try:
|
||||
box = self.order_points_clockwise(
|
||||
np.array(points).reshape(-1, 2))
|
||||
if cv2.contourArea(box) > 0:
|
||||
boxes.append(box)
|
||||
texts.append(transcription)
|
||||
ignores.append(transcription in ['*', '###'])
|
||||
except:
|
||||
print('load label failed!')
|
||||
data = {
|
||||
'image': imgvalue,
|
||||
'shape': [imgvalue.shape[0], imgvalue.shape[1]],
|
||||
'polys': np.array(boxes),
|
||||
'texts': texts,
|
||||
'ignore_tags': ignores,
|
||||
}
|
||||
return data
|
||||
|
||||
def NormalizeImage(self, data):
|
||||
im = data['image']
|
||||
img_mean = [0.485, 0.456, 0.406]
|
||||
img_std = [0.229, 0.224, 0.225]
|
||||
im = im.astype(np.float32, copy=False)
|
||||
im = im / 255
|
||||
im -= img_mean
|
||||
im /= img_std
|
||||
channel_swap = (2, 0, 1)
|
||||
im = im.transpose(channel_swap)
|
||||
data['image'] = im
|
||||
return data
|
||||
|
||||
def FilterKeys(self, data):
|
||||
filter_keys = ['polys', 'texts', 'ignore_tags', 'shape']
|
||||
for key in filter_keys:
|
||||
if key in data:
|
||||
del data[key]
|
||||
return data
|
||||
|
||||
def convert_label_infor(self, label_infor):
|
||||
label_infor = label_infor.decode()
|
||||
label_infor = label_infor.encode('utf-8').decode('utf-8-sig')
|
||||
substr = label_infor.strip("\n").split("\t")
|
||||
img_path = self.img_set_dir + substr[0]
|
||||
label = json.loads(substr[1])
|
||||
return img_path, label
|
||||
|
||||
def __call__(self, label_infor):
|
||||
img_path, gt_label = self.convert_label_infor(label_infor)
|
||||
imgvalue = cv2.imread(img_path)
|
||||
if imgvalue is None:
|
||||
return None
|
||||
data = self.make_data_dict(imgvalue, gt_label)
|
||||
data = AugmentData(data)
|
||||
data = RandomCropData(data, self.image_shape[1:])
|
||||
data = MakeShrinkMap(data)
|
||||
data = MakeBorderMap(data)
|
||||
data = self.NormalizeImage(data)
|
||||
data = self.FilterKeys(data)
|
||||
return data['image'], data['shrink_map'], data['shrink_mask'], data[
|
||||
'threshold_map'], data['threshold_mask']
|
||||
|
||||
|
||||
class DBProcessTest(object):
|
||||
def __init__(self, params):
|
||||
super(DBProcessTest, self).__init__()
|
||||
self.resize_type = 0
|
||||
if 'det_image_shape' in params:
|
||||
self.image_shape = params['det_image_shape']
|
||||
# print(self.image_shape)
|
||||
self.resize_type = 1
|
||||
if 'max_side_len' in params:
|
||||
self.max_side_len = params['max_side_len']
|
||||
else:
|
||||
self.max_side_len = 2400
|
||||
|
||||
def resize_image_type0(self, im):
|
||||
"""
|
||||
resize image to a size multiple of 32 which is required by the network
|
||||
:param im: the resized image
|
||||
:param max_side_len: limit of max image size to avoid out of memory in gpu
|
||||
:return: the resized image and the resize ratio
|
||||
"""
|
||||
max_side_len = self.max_side_len
|
||||
h, w, _ = im.shape
|
||||
|
||||
resize_w = w
|
||||
resize_h = h
|
||||
|
||||
# limit the max side
|
||||
if max(resize_h, resize_w) > max_side_len:
|
||||
if resize_h > resize_w:
|
||||
ratio = float(max_side_len) / resize_h
|
||||
else:
|
||||
ratio = float(max_side_len) / resize_w
|
||||
else:
|
||||
ratio = 1.
|
||||
resize_h = int(resize_h * ratio)
|
||||
resize_w = int(resize_w * ratio)
|
||||
if resize_h % 32 == 0:
|
||||
resize_h = resize_h
|
||||
else:
|
||||
resize_h = (resize_h // 32 + 1) * 32
|
||||
if resize_w % 32 == 0:
|
||||
resize_w = resize_w
|
||||
else:
|
||||
resize_w = (resize_w // 32 + 1) * 32
|
||||
try:
|
||||
if int(resize_w) <= 0 or int(resize_h) <= 0:
|
||||
return None, (None, None)
|
||||
im = cv2.resize(im, (int(resize_w), int(resize_h)))
|
||||
except:
|
||||
print(im.shape, resize_w, resize_h)
|
||||
sys.exit(0)
|
||||
ratio_h = resize_h / float(h)
|
||||
ratio_w = resize_w / float(w)
|
||||
return im, (ratio_h, ratio_w)
|
||||
|
||||
def resize_image_type1(self, im):
|
||||
resize_h, resize_w = self.image_shape
|
||||
ori_h, ori_w = im.shape[:2] # (h, w, c)
|
||||
im = cv2.resize(im, (int(resize_w), int(resize_h)))
|
||||
ratio_h = float(resize_h) / ori_h
|
||||
ratio_w = float(resize_w) / ori_w
|
||||
return im, (ratio_h, ratio_w)
|
||||
|
||||
def normalize(self, im):
|
||||
img_mean = [0.485, 0.456, 0.406]
|
||||
img_std = [0.229, 0.224, 0.225]
|
||||
im = im.astype(np.float32, copy=False)
|
||||
im = im / 255
|
||||
im -= img_mean
|
||||
im /= img_std
|
||||
channel_swap = (2, 0, 1)
|
||||
im = im.transpose(channel_swap)
|
||||
return im
|
||||
|
||||
def __call__(self, im):
|
||||
if self.resize_type == 0:
|
||||
im, (ratio_h, ratio_w) = self.resize_image_type0(im)
|
||||
else:
|
||||
im, (ratio_h, ratio_w) = self.resize_image_type1(im)
|
||||
im = self.normalize(im)
|
||||
im = im[np.newaxis, :]
|
||||
return [im, (ratio_h, ratio_w)]
|
|
@ -0,0 +1,509 @@
|
|||
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
|
||||
import math
|
||||
import cv2
|
||||
import numpy as np
|
||||
import json
|
||||
|
||||
|
||||
class EASTProcessTrain(object):
|
||||
def __init__(self, params):
|
||||
self.img_set_dir = params['img_set_dir']
|
||||
self.random_scale = np.array([0.5, 1, 2.0, 3.0])
|
||||
self.background_ratio = params['background_ratio']
|
||||
self.min_crop_side_ratio = params['min_crop_side_ratio']
|
||||
image_shape = params['image_shape']
|
||||
self.input_size = image_shape[1]
|
||||
self.min_text_size = params['min_text_size']
|
||||
|
||||
def preprocess(self, im):
|
||||
input_size = self.input_size
|
||||
im_shape = im.shape
|
||||
im_size_min = np.min(im_shape[0:2])
|
||||
im_size_max = np.max(im_shape[0:2])
|
||||
im_scale = float(input_size) / float(im_size_max)
|
||||
im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale)
|
||||
img_mean = [0.485, 0.456, 0.406]
|
||||
img_std = [0.229, 0.224, 0.225]
|
||||
im = im[:, :, ::-1].astype(np.float32)
|
||||
im = im / 255
|
||||
im -= img_mean
|
||||
im /= img_std
|
||||
new_h, new_w, _ = im.shape
|
||||
im_padded = np.zeros((input_size, input_size, 3), dtype=np.float32)
|
||||
im_padded[:new_h, :new_w, :] = im
|
||||
im_padded = im_padded.transpose((2, 0, 1))
|
||||
im_padded = im_padded[np.newaxis, :]
|
||||
return im_padded, im_scale
|
||||
|
||||
def convert_label_infor(self, label_infor):
|
||||
label_infor = label_infor.decode()
|
||||
label_infor = label_infor.encode('utf-8').decode('utf-8-sig')
|
||||
substr = label_infor.strip("\n").split("\t")
|
||||
img_path = self.img_set_dir + substr[0]
|
||||
label = json.loads(substr[1])
|
||||
nBox = len(label)
|
||||
wordBBs, txts, txt_tags = [], [], []
|
||||
for bno in range(0, nBox):
|
||||
wordBB = label[bno]['points']
|
||||
txt = label[bno]['transcription']
|
||||
wordBBs.append(wordBB)
|
||||
txts.append(txt)
|
||||
if txt == '###':
|
||||
txt_tags.append(True)
|
||||
else:
|
||||
txt_tags.append(False)
|
||||
wordBBs = np.array(wordBBs, dtype=np.float32)
|
||||
txt_tags = np.array(txt_tags, dtype=np.bool)
|
||||
return img_path, wordBBs, txt_tags, txts
|
||||
|
||||
def rotate_im_poly(self, im, text_polys):
|
||||
"""
|
||||
rotate image with 90 / 180 / 270 degre
|
||||
"""
|
||||
im_w, im_h = im.shape[1], im.shape[0]
|
||||
dst_im = im.copy()
|
||||
dst_polys = []
|
||||
rand_degree_ratio = np.random.rand()
|
||||
rand_degree_cnt = 1
|
||||
if rand_degree_ratio > 0.333 and rand_degree_ratio < 0.666:
|
||||
rand_degree_cnt = 2
|
||||
elif rand_degree_ratio > 0.666:
|
||||
rand_degree_cnt = 3
|
||||
for i in range(rand_degree_cnt):
|
||||
dst_im = np.rot90(dst_im)
|
||||
rot_degree = -90 * rand_degree_cnt
|
||||
rot_angle = rot_degree * math.pi / 180.0
|
||||
n_poly = text_polys.shape[0]
|
||||
cx, cy = 0.5 * im_w, 0.5 * im_h
|
||||
ncx, ncy = 0.5 * dst_im.shape[1], 0.5 * dst_im.shape[0]
|
||||
for i in range(n_poly):
|
||||
wordBB = text_polys[i]
|
||||
poly = []
|
||||
for j in range(4):
|
||||
sx, sy = wordBB[j][0], wordBB[j][1]
|
||||
dx = math.cos(rot_angle) * (sx - cx)\
|
||||
- math.sin(rot_angle) * (sy - cy) + ncx
|
||||
dy = math.sin(rot_angle) * (sx - cx)\
|
||||
+ math.cos(rot_angle) * (sy - cy) + ncy
|
||||
poly.append([dx, dy])
|
||||
dst_polys.append(poly)
|
||||
dst_polys = np.array(dst_polys, dtype=np.float32)
|
||||
return dst_im, dst_polys
|
||||
|
||||
def polygon_area(self, poly):
|
||||
"""
|
||||
compute area of a polygon
|
||||
:param poly:
|
||||
:return:
|
||||
"""
|
||||
edge = [(poly[1][0] - poly[0][0]) * (poly[1][1] + poly[0][1]),
|
||||
(poly[2][0] - poly[1][0]) * (poly[2][1] + poly[1][1]),
|
||||
(poly[3][0] - poly[2][0]) * (poly[3][1] + poly[2][1]),
|
||||
(poly[0][0] - poly[3][0]) * (poly[0][1] + poly[3][1])]
|
||||
return np.sum(edge) / 2.
|
||||
|
||||
def check_and_validate_polys(self, polys, tags, img_height, img_width):
|
||||
"""
|
||||
check so that the text poly is in the same direction,
|
||||
and also filter some invalid polygons
|
||||
:param polys:
|
||||
:param tags:
|
||||
:return:
|
||||
"""
|
||||
h, w = img_height, img_width
|
||||
if polys.shape[0] == 0:
|
||||
return polys
|
||||
polys[:, :, 0] = np.clip(polys[:, :, 0], 0, w - 1)
|
||||
polys[:, :, 1] = np.clip(polys[:, :, 1], 0, h - 1)
|
||||
|
||||
validated_polys = []
|
||||
validated_tags = []
|
||||
for poly, tag in zip(polys, tags):
|
||||
p_area = self.polygon_area(poly)
|
||||
#invalid poly
|
||||
if abs(p_area) < 1:
|
||||
continue
|
||||
if p_area > 0:
|
||||
#'poly in wrong direction'
|
||||
if tag == False:
|
||||
tag = True #reversed cases should be ignore
|
||||
poly = poly[(0, 3, 2, 1), :]
|
||||
validated_polys.append(poly)
|
||||
validated_tags.append(tag)
|
||||
return np.array(validated_polys), np.array(validated_tags)
|
||||
|
||||
def draw_img_polys(self, img, polys):
|
||||
if len(img.shape) == 4:
|
||||
img = np.squeeze(img, axis=0)
|
||||
if img.shape[0] == 3:
|
||||
img = img.transpose((1, 2, 0))
|
||||
img[:, :, 2] += 123.68
|
||||
img[:, :, 1] += 116.78
|
||||
img[:, :, 0] += 103.94
|
||||
cv2.imwrite("tmp.jpg", img)
|
||||
img = cv2.imread("tmp.jpg")
|
||||
for box in polys:
|
||||
box = box.astype(np.int32).reshape((-1, 1, 2))
|
||||
cv2.polylines(img, [box], True, color=(255, 255, 0), thickness=2)
|
||||
import random
|
||||
ino = random.randint(0, 100)
|
||||
cv2.imwrite("tmp_%d.jpg" % ino, img)
|
||||
return
|
||||
|
||||
def shrink_poly(self, poly, r):
|
||||
"""
|
||||
fit a poly inside the origin poly, maybe bugs here...
|
||||
used for generate the score map
|
||||
:param poly: the text poly
|
||||
:param r: r in the paper
|
||||
:return: the shrinked poly
|
||||
"""
|
||||
# shrink ratio
|
||||
R = 0.3
|
||||
# find the longer pair
|
||||
dist0 = np.linalg.norm(poly[0] - poly[1])
|
||||
dist1 = np.linalg.norm(poly[2] - poly[3])
|
||||
dist2 = np.linalg.norm(poly[0] - poly[3])
|
||||
dist3 = np.linalg.norm(poly[1] - poly[2])
|
||||
if dist0 + dist1 > dist2 + dist3:
|
||||
# first move (p0, p1), (p2, p3), then (p0, p3), (p1, p2)
|
||||
## p0, p1
|
||||
theta = np.arctan2((poly[1][1] - poly[0][1]),
|
||||
(poly[1][0] - poly[0][0]))
|
||||
poly[0][0] += R * r[0] * np.cos(theta)
|
||||
poly[0][1] += R * r[0] * np.sin(theta)
|
||||
poly[1][0] -= R * r[1] * np.cos(theta)
|
||||
poly[1][1] -= R * r[1] * np.sin(theta)
|
||||
## p2, p3
|
||||
theta = np.arctan2((poly[2][1] - poly[3][1]),
|
||||
(poly[2][0] - poly[3][0]))
|
||||
poly[3][0] += R * r[3] * np.cos(theta)
|
||||
poly[3][1] += R * r[3] * np.sin(theta)
|
||||
poly[2][0] -= R * r[2] * np.cos(theta)
|
||||
poly[2][1] -= R * r[2] * np.sin(theta)
|
||||
## p0, p3
|
||||
theta = np.arctan2((poly[3][0] - poly[0][0]),
|
||||
(poly[3][1] - poly[0][1]))
|
||||
poly[0][0] += R * r[0] * np.sin(theta)
|
||||
poly[0][1] += R * r[0] * np.cos(theta)
|
||||
poly[3][0] -= R * r[3] * np.sin(theta)
|
||||
poly[3][1] -= R * r[3] * np.cos(theta)
|
||||
## p1, p2
|
||||
theta = np.arctan2((poly[2][0] - poly[1][0]),
|
||||
(poly[2][1] - poly[1][1]))
|
||||
poly[1][0] += R * r[1] * np.sin(theta)
|
||||
poly[1][1] += R * r[1] * np.cos(theta)
|
||||
poly[2][0] -= R * r[2] * np.sin(theta)
|
||||
poly[2][1] -= R * r[2] * np.cos(theta)
|
||||
else:
|
||||
## p0, p3
|
||||
# print poly
|
||||
theta = np.arctan2((poly[3][0] - poly[0][0]),
|
||||
(poly[3][1] - poly[0][1]))
|
||||
poly[0][0] += R * r[0] * np.sin(theta)
|
||||
poly[0][1] += R * r[0] * np.cos(theta)
|
||||
poly[3][0] -= R * r[3] * np.sin(theta)
|
||||
poly[3][1] -= R * r[3] * np.cos(theta)
|
||||
## p1, p2
|
||||
theta = np.arctan2((poly[2][0] - poly[1][0]),
|
||||
(poly[2][1] - poly[1][1]))
|
||||
poly[1][0] += R * r[1] * np.sin(theta)
|
||||
poly[1][1] += R * r[1] * np.cos(theta)
|
||||
poly[2][0] -= R * r[2] * np.sin(theta)
|
||||
poly[2][1] -= R * r[2] * np.cos(theta)
|
||||
## p0, p1
|
||||
theta = np.arctan2((poly[1][1] - poly[0][1]),
|
||||
(poly[1][0] - poly[0][0]))
|
||||
poly[0][0] += R * r[0] * np.cos(theta)
|
||||
poly[0][1] += R * r[0] * np.sin(theta)
|
||||
poly[1][0] -= R * r[1] * np.cos(theta)
|
||||
poly[1][1] -= R * r[1] * np.sin(theta)
|
||||
## p2, p3
|
||||
theta = np.arctan2((poly[2][1] - poly[3][1]),
|
||||
(poly[2][0] - poly[3][0]))
|
||||
poly[3][0] += R * r[3] * np.cos(theta)
|
||||
poly[3][1] += R * r[3] * np.sin(theta)
|
||||
poly[2][0] -= R * r[2] * np.cos(theta)
|
||||
poly[2][1] -= R * r[2] * np.sin(theta)
|
||||
return poly
|
||||
|
||||
def generate_quad(self, im_size, polys, tags):
|
||||
"""
|
||||
Generate quadrangle.
|
||||
"""
|
||||
h, w = im_size
|
||||
poly_mask = np.zeros((h, w), dtype=np.uint8)
|
||||
score_map = np.zeros((h, w), dtype=np.uint8)
|
||||
# (x1, y1, ..., x4, y4, short_edge_norm)
|
||||
geo_map = np.zeros((h, w, 9), dtype=np.float32)
|
||||
# mask used during traning, to ignore some hard areas
|
||||
training_mask = np.ones((h, w), dtype=np.uint8)
|
||||
for poly_idx, poly_tag in enumerate(zip(polys, tags)):
|
||||
poly = poly_tag[0]
|
||||
tag = poly_tag[1]
|
||||
|
||||
r = [None, None, None, None]
|
||||
for i in range(4):
|
||||
dist1 = np.linalg.norm(poly[i] - poly[(i + 1) % 4])
|
||||
dist2 = np.linalg.norm(poly[i] - poly[(i - 1) % 4])
|
||||
r[i] = min(dist1, dist2)
|
||||
# score map
|
||||
shrinked_poly = self.shrink_poly(
|
||||
poly.copy(), r).astype(np.int32)[np.newaxis, :, :]
|
||||
cv2.fillPoly(score_map, shrinked_poly, 1)
|
||||
cv2.fillPoly(poly_mask, shrinked_poly, poly_idx + 1)
|
||||
# if the poly is too small, then ignore it during training
|
||||
poly_h = min(
|
||||
np.linalg.norm(poly[0] - poly[3]),
|
||||
np.linalg.norm(poly[1] - poly[2]))
|
||||
poly_w = min(
|
||||
np.linalg.norm(poly[0] - poly[1]),
|
||||
np.linalg.norm(poly[2] - poly[3]))
|
||||
if min(poly_h, poly_w) < self.min_text_size:
|
||||
cv2.fillPoly(training_mask,
|
||||
poly.astype(np.int32)[np.newaxis, :, :], 0)
|
||||
|
||||
if tag:
|
||||
cv2.fillPoly(training_mask,
|
||||
poly.astype(np.int32)[np.newaxis, :, :], 0)
|
||||
|
||||
xy_in_poly = np.argwhere(poly_mask == (poly_idx + 1))
|
||||
# geo map.
|
||||
y_in_poly = xy_in_poly[:, 0]
|
||||
x_in_poly = xy_in_poly[:, 1]
|
||||
poly[:, 0] = np.minimum(np.maximum(poly[:, 0], 0), w)
|
||||
poly[:, 1] = np.minimum(np.maximum(poly[:, 1], 0), h)
|
||||
for pno in range(4):
|
||||
geo_channel_beg = pno * 2
|
||||
geo_map[y_in_poly, x_in_poly, geo_channel_beg] =\
|
||||
x_in_poly - poly[pno, 0]
|
||||
geo_map[y_in_poly, x_in_poly, geo_channel_beg+1] =\
|
||||
y_in_poly - poly[pno, 1]
|
||||
geo_map[y_in_poly, x_in_poly, 8] = \
|
||||
1.0 / max(min(poly_h, poly_w), 1.0)
|
||||
return score_map, geo_map, training_mask
|
||||
|
||||
def crop_area(self,
|
||||
im,
|
||||
polys,
|
||||
tags,
|
||||
txts,
|
||||
crop_background=False,
|
||||
max_tries=50):
|
||||
"""
|
||||
make random crop from the input image
|
||||
:param im:
|
||||
:param polys:
|
||||
:param tags:
|
||||
:param crop_background:
|
||||
:param max_tries:
|
||||
:return:
|
||||
"""
|
||||
h, w, _ = im.shape
|
||||
pad_h = h // 10
|
||||
pad_w = w // 10
|
||||
h_array = np.zeros((h + pad_h * 2), dtype=np.int32)
|
||||
w_array = np.zeros((w + pad_w * 2), dtype=np.int32)
|
||||
for poly in polys:
|
||||
poly = np.round(poly, decimals=0).astype(np.int32)
|
||||
minx = np.min(poly[:, 0])
|
||||
maxx = np.max(poly[:, 0])
|
||||
w_array[minx + pad_w:maxx + pad_w] = 1
|
||||
miny = np.min(poly[:, 1])
|
||||
maxy = np.max(poly[:, 1])
|
||||
h_array[miny + pad_h:maxy + pad_h] = 1
|
||||
# ensure the cropped area not across a text
|
||||
h_axis = np.where(h_array == 0)[0]
|
||||
w_axis = np.where(w_array == 0)[0]
|
||||
if len(h_axis) == 0 or len(w_axis) == 0:
|
||||
return im, polys, tags, txts
|
||||
|
||||
for i in range(max_tries):
|
||||
xx = np.random.choice(w_axis, size=2)
|
||||
xmin = np.min(xx) - pad_w
|
||||
xmax = np.max(xx) - pad_w
|
||||
xmin = np.clip(xmin, 0, w - 1)
|
||||
xmax = np.clip(xmax, 0, w - 1)
|
||||
yy = np.random.choice(h_axis, size=2)
|
||||
ymin = np.min(yy) - pad_h
|
||||
ymax = np.max(yy) - pad_h
|
||||
ymin = np.clip(ymin, 0, h - 1)
|
||||
ymax = np.clip(ymax, 0, h - 1)
|
||||
if xmax - xmin < self.min_crop_side_ratio * w or \
|
||||
ymax - ymin < self.min_crop_side_ratio * h:
|
||||
# area too small
|
||||
continue
|
||||
if polys.shape[0] != 0:
|
||||
poly_axis_in_area = (polys[:, :, 0] >= xmin)\
|
||||
& (polys[:, :, 0] <= xmax)\
|
||||
& (polys[:, :, 1] >= ymin)\
|
||||
& (polys[:, :, 1] <= ymax)
|
||||
selected_polys = np.where(
|
||||
np.sum(poly_axis_in_area, axis=1) == 4)[0]
|
||||
else:
|
||||
selected_polys = []
|
||||
|
||||
if len(selected_polys) == 0:
|
||||
# no text in this area
|
||||
if crop_background:
|
||||
im = im[ymin:ymax + 1, xmin:xmax + 1, :]
|
||||
polys = []
|
||||
tags = []
|
||||
txts = []
|
||||
return im, polys, tags, txts
|
||||
else:
|
||||
continue
|
||||
|
||||
im = im[ymin:ymax + 1, xmin:xmax + 1, :]
|
||||
polys = polys[selected_polys]
|
||||
tags = tags[selected_polys]
|
||||
txts_tmp = []
|
||||
for selected_poly in selected_polys:
|
||||
txts_tmp.append(txts[selected_poly])
|
||||
txts = txts_tmp
|
||||
polys[:, :, 0] -= xmin
|
||||
polys[:, :, 1] -= ymin
|
||||
return im, polys, tags, txts
|
||||
return im, polys, tags, txts
|
||||
|
||||
def crop_background_infor(self, im, text_polys, text_tags, text_strs):
|
||||
im, text_polys, text_tags, text_strs = self.crop_area(
|
||||
im, text_polys, text_tags, text_strs, crop_background=True)
|
||||
if len(text_polys) > 0:
|
||||
return None
|
||||
# pad and resize image
|
||||
input_size = self.input_size
|
||||
im, ratio = self.preprocess(im)
|
||||
score_map = np.zeros((input_size, input_size), dtype=np.float32)
|
||||
geo_map = np.zeros((input_size, input_size, 9), dtype=np.float32)
|
||||
training_mask = np.ones((input_size, input_size), dtype=np.float32)
|
||||
return im, score_map, geo_map, training_mask
|
||||
|
||||
def crop_foreground_infor(self, im, text_polys, text_tags, text_strs):
|
||||
im, text_polys, text_tags, text_strs = self.crop_area(
|
||||
im, text_polys, text_tags, text_strs, crop_background=False)
|
||||
if text_polys.shape[0] == 0:
|
||||
return None
|
||||
#continue for all ignore case
|
||||
if np.sum((text_tags * 1.0)) >= text_tags.size:
|
||||
return None
|
||||
# pad and resize image
|
||||
input_size = self.input_size
|
||||
im, ratio = self.preprocess(im)
|
||||
text_polys[:, :, 0] *= ratio
|
||||
text_polys[:, :, 1] *= ratio
|
||||
_, _, new_h, new_w = im.shape
|
||||
# print(im.shape)
|
||||
# self.draw_img_polys(im, text_polys)
|
||||
score_map, geo_map, training_mask = self.generate_quad(
|
||||
(new_h, new_w), text_polys, text_tags)
|
||||
return im, score_map, geo_map, training_mask
|
||||
|
||||
def __call__(self, label_infor):
|
||||
infor = self.convert_label_infor(label_infor)
|
||||
im_path, text_polys, text_tags, text_strs = infor
|
||||
im = cv2.imread(im_path)
|
||||
if im is None:
|
||||
return None
|
||||
if text_polys.shape[0] == 0:
|
||||
return None
|
||||
#add rotate cases
|
||||
if np.random.rand() < 0.5:
|
||||
im, text_polys = self.rotate_im_poly(im, text_polys)
|
||||
h, w, _ = im.shape
|
||||
text_polys, text_tags = self.check_and_validate_polys(text_polys,
|
||||
text_tags, h, w)
|
||||
if text_polys.shape[0] == 0:
|
||||
return None
|
||||
|
||||
# random scale this image
|
||||
rd_scale = np.random.choice(self.random_scale)
|
||||
im = cv2.resize(im, dsize=None, fx=rd_scale, fy=rd_scale)
|
||||
text_polys *= rd_scale
|
||||
if np.random.rand() < self.background_ratio:
|
||||
outs = self.crop_background_infor(im, text_polys, text_tags,
|
||||
text_strs)
|
||||
else:
|
||||
outs = self.crop_foreground_infor(im, text_polys, text_tags,
|
||||
text_strs)
|
||||
|
||||
if outs is None:
|
||||
return None
|
||||
im, score_map, geo_map, training_mask = outs
|
||||
score_map = score_map[np.newaxis, ::4, ::4].astype(np.float32)
|
||||
geo_map = np.swapaxes(geo_map, 1, 2)
|
||||
geo_map = np.swapaxes(geo_map, 1, 0)
|
||||
geo_map = geo_map[:, ::4, ::4].astype(np.float32)
|
||||
training_mask = training_mask[np.newaxis, ::4, ::4]
|
||||
training_mask = training_mask.astype(np.float32)
|
||||
return im, score_map, geo_map, training_mask
|
||||
|
||||
|
||||
class EASTProcessTest(object):
|
||||
def __init__(self, params):
|
||||
super(EASTProcessTest, self).__init__()
|
||||
if 'max_side_len' in params:
|
||||
self.max_side_len = params['max_side_len']
|
||||
else:
|
||||
self.max_side_len = 2400
|
||||
|
||||
def resize_image(self, im):
|
||||
"""
|
||||
resize image to a size multiple of 32 which is required by the network
|
||||
:param im: the resized image
|
||||
:param max_side_len: limit of max image size to avoid out of memory in gpu
|
||||
:return: the resized image and the resize ratio
|
||||
"""
|
||||
max_side_len = self.max_side_len
|
||||
h, w, _ = im.shape
|
||||
|
||||
resize_w = w
|
||||
resize_h = h
|
||||
|
||||
# limit the max side
|
||||
if max(resize_h, resize_w) > max_side_len:
|
||||
if resize_h > resize_w:
|
||||
ratio = float(max_side_len) / resize_h
|
||||
else:
|
||||
ratio = float(max_side_len) / resize_w
|
||||
else:
|
||||
ratio = 1.
|
||||
resize_h = int(resize_h * ratio)
|
||||
resize_w = int(resize_w * ratio)
|
||||
if resize_h % 32 == 0:
|
||||
resize_h = resize_h
|
||||
else:
|
||||
resize_h = (resize_h // 32 - 1) * 32
|
||||
if resize_w % 32 == 0:
|
||||
resize_w = resize_w
|
||||
else:
|
||||
resize_w = (resize_w // 32 - 1) * 32
|
||||
im = cv2.resize(im, (int(resize_w), int(resize_h)))
|
||||
ratio_h = resize_h / float(h)
|
||||
ratio_w = resize_w / float(w)
|
||||
return im, (ratio_h, ratio_w)
|
||||
|
||||
def __call__(self, im):
|
||||
im, (ratio_h, ratio_w) = self.resize_image(im)
|
||||
img_mean = [0.485, 0.456, 0.406]
|
||||
img_std = [0.229, 0.224, 0.225]
|
||||
im = im[:, :, ::-1].astype(np.float32)
|
||||
im = im / 255
|
||||
im -= img_mean
|
||||
im /= img_std
|
||||
im = im.transpose((2, 0, 1))
|
||||
im = im[np.newaxis, :]
|
||||
return [im, (ratio_h, ratio_w)]
|
|
@ -0,0 +1,147 @@
|
|||
# -*- coding:utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import numpy as np
|
||||
import cv2
|
||||
np.seterr(divide='ignore', invalid='ignore')
|
||||
import pyclipper
|
||||
from shapely.geometry import Polygon
|
||||
import sys
|
||||
import warnings
|
||||
warnings.simplefilter("ignore")
|
||||
|
||||
|
||||
def draw_border_map(polygon, canvas, mask, shrink_ratio):
|
||||
polygon = np.array(polygon)
|
||||
assert polygon.ndim == 2
|
||||
assert polygon.shape[1] == 2
|
||||
|
||||
polygon_shape = Polygon(polygon)
|
||||
if polygon_shape.area <= 0:
|
||||
return
|
||||
distance = polygon_shape.area * (
|
||||
1 - np.power(shrink_ratio, 2)) / polygon_shape.length
|
||||
subject = [tuple(l) for l in polygon]
|
||||
padding = pyclipper.PyclipperOffset()
|
||||
padding.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
|
||||
|
||||
padded_polygon = np.array(padding.Execute(distance)[0])
|
||||
cv2.fillPoly(mask, [padded_polygon.astype(np.int32)], 1.0)
|
||||
|
||||
xmin = padded_polygon[:, 0].min()
|
||||
xmax = padded_polygon[:, 0].max()
|
||||
ymin = padded_polygon[:, 1].min()
|
||||
ymax = padded_polygon[:, 1].max()
|
||||
width = xmax - xmin + 1
|
||||
height = ymax - ymin + 1
|
||||
|
||||
polygon[:, 0] = polygon[:, 0] - xmin
|
||||
polygon[:, 1] = polygon[:, 1] - ymin
|
||||
|
||||
xs = np.broadcast_to(
|
||||
np.linspace(
|
||||
0, width - 1, num=width).reshape(1, width), (height, width))
|
||||
ys = np.broadcast_to(
|
||||
np.linspace(
|
||||
0, height - 1, num=height).reshape(height, 1), (height, width))
|
||||
|
||||
distance_map = np.zeros((polygon.shape[0], height, width), dtype=np.float32)
|
||||
for i in range(polygon.shape[0]):
|
||||
j = (i + 1) % polygon.shape[0]
|
||||
absolute_distance = _distance(xs, ys, polygon[i], polygon[j])
|
||||
distance_map[i] = np.clip(absolute_distance / distance, 0, 1)
|
||||
distance_map = distance_map.min(axis=0)
|
||||
|
||||
xmin_valid = min(max(0, xmin), canvas.shape[1] - 1)
|
||||
xmax_valid = min(max(0, xmax), canvas.shape[1] - 1)
|
||||
ymin_valid = min(max(0, ymin), canvas.shape[0] - 1)
|
||||
ymax_valid = min(max(0, ymax), canvas.shape[0] - 1)
|
||||
canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1] = np.fmax(
|
||||
1 - distance_map[ymin_valid - ymin:ymax_valid - ymax + height,
|
||||
xmin_valid - xmin:xmax_valid - xmax + width],
|
||||
canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1])
|
||||
|
||||
|
||||
def _distance(xs, ys, point_1, point_2):
|
||||
'''
|
||||
compute the distance from point to a line
|
||||
ys: coordinates in the first axis
|
||||
xs: coordinates in the second axis
|
||||
point_1, point_2: (x, y), the end of the line
|
||||
'''
|
||||
height, width = xs.shape[:2]
|
||||
square_distance_1 = np.square(xs - point_1[0]) + np.square(ys - point_1[1])
|
||||
square_distance_2 = np.square(xs - point_2[0]) + np.square(ys - point_2[1])
|
||||
square_distance = np.square(point_1[0] - point_2[0]) + np.square(point_1[
|
||||
1] - point_2[1])
|
||||
|
||||
cosin = (square_distance - square_distance_1 - square_distance_2) / (
|
||||
2 * np.sqrt(square_distance_1 * square_distance_2))
|
||||
square_sin = 1 - np.square(cosin)
|
||||
square_sin = np.nan_to_num(square_sin)
|
||||
result = np.sqrt(square_distance_1 * square_distance_2 * square_sin /
|
||||
square_distance)
|
||||
|
||||
result[cosin <
|
||||
0] = np.sqrt(np.fmin(square_distance_1, square_distance_2))[cosin <
|
||||
0]
|
||||
# self.extend_line(point_1, point_2, result)
|
||||
return result
|
||||
|
||||
|
||||
def extend_line(point_1, point_2, result, shrink_ratio):
|
||||
ex_point_1 = (
|
||||
int(
|
||||
round(point_1[0] + (point_1[0] - point_2[0]) * (1 + shrink_ratio))),
|
||||
int(
|
||||
round(point_1[1] + (point_1[1] - point_2[1]) * (1 + shrink_ratio))))
|
||||
cv2.line(
|
||||
result,
|
||||
tuple(ex_point_1),
|
||||
tuple(point_1),
|
||||
4096.0,
|
||||
1,
|
||||
lineType=cv2.LINE_AA,
|
||||
shift=0)
|
||||
ex_point_2 = (
|
||||
int(
|
||||
round(point_2[0] + (point_2[0] - point_1[0]) * (1 + shrink_ratio))),
|
||||
int(
|
||||
round(point_2[1] + (point_2[1] - point_1[1]) * (1 + shrink_ratio))))
|
||||
cv2.line(
|
||||
result,
|
||||
tuple(ex_point_2),
|
||||
tuple(point_2),
|
||||
4096.0,
|
||||
1,
|
||||
lineType=cv2.LINE_AA,
|
||||
shift=0)
|
||||
return ex_point_1, ex_point_2
|
||||
|
||||
|
||||
def MakeBorderMap(data):
|
||||
shrink_ratio = 0.4
|
||||
thresh_min = 0.3
|
||||
thresh_max = 0.7
|
||||
|
||||
im = data['image']
|
||||
text_polys = data['polys']
|
||||
ignore_tags = data['ignore_tags']
|
||||
|
||||
canvas = np.zeros(im.shape[:2], dtype=np.float32)
|
||||
mask = np.zeros(im.shape[:2], dtype=np.float32)
|
||||
|
||||
for i in range(len(text_polys)):
|
||||
if ignore_tags[i]:
|
||||
continue
|
||||
draw_border_map(
|
||||
text_polys[i], canvas, mask=mask, shrink_ratio=shrink_ratio)
|
||||
canvas = canvas * (thresh_max - thresh_min) + thresh_min
|
||||
|
||||
data['threshold_map'] = canvas
|
||||
data['threshold_mask'] = mask
|
||||
return data
|
|
@ -0,0 +1,88 @@
|
|||
# -*- coding:utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import numpy as np
|
||||
import cv2
|
||||
from shapely.geometry import Polygon
|
||||
import pyclipper
|
||||
|
||||
|
||||
def validate_polygons(polygons, ignore_tags, h, w):
|
||||
'''
|
||||
polygons (numpy.array, required): of shape (num_instances, num_points, 2)
|
||||
'''
|
||||
if len(polygons) == 0:
|
||||
return polygons, ignore_tags
|
||||
assert len(polygons) == len(ignore_tags)
|
||||
for polygon in polygons:
|
||||
polygon[:, 0] = np.clip(polygon[:, 0], 0, w - 1)
|
||||
polygon[:, 1] = np.clip(polygon[:, 1], 0, h - 1)
|
||||
|
||||
for i in range(len(polygons)):
|
||||
area = polygon_area(polygons[i])
|
||||
if abs(area) < 1:
|
||||
ignore_tags[i] = True
|
||||
if area > 0:
|
||||
polygons[i] = polygons[i][::-1, :]
|
||||
return polygons, ignore_tags
|
||||
|
||||
|
||||
def polygon_area(polygon):
|
||||
edge = 0
|
||||
for i in range(polygon.shape[0]):
|
||||
next_index = (i + 1) % polygon.shape[0]
|
||||
edge += (polygon[next_index, 0] - polygon[i, 0]) * (
|
||||
polygon[next_index, 1] - polygon[i, 1])
|
||||
|
||||
return edge / 2.
|
||||
|
||||
|
||||
def MakeShrinkMap(data):
|
||||
min_text_size = 8
|
||||
shrink_ratio = 0.4
|
||||
|
||||
image = data['image']
|
||||
text_polys = data['polys']
|
||||
ignore_tags = data['ignore_tags']
|
||||
|
||||
h, w = image.shape[:2]
|
||||
text_polys, ignore_tags = validate_polygons(text_polys, ignore_tags, h, w)
|
||||
gt = np.zeros((h, w), dtype=np.float32)
|
||||
# gt = np.zeros((1, h, w), dtype=np.float32)
|
||||
mask = np.ones((h, w), dtype=np.float32)
|
||||
for i in range(len(text_polys)):
|
||||
polygon = text_polys[i]
|
||||
height = max(polygon[:, 1]) - min(polygon[:, 1])
|
||||
width = max(polygon[:, 0]) - min(polygon[:, 0])
|
||||
# height = min(np.linalg.norm(polygon[0] - polygon[3]),
|
||||
# np.linalg.norm(polygon[1] - polygon[2]))
|
||||
# width = min(np.linalg.norm(polygon[0] - polygon[1]),
|
||||
# np.linalg.norm(polygon[2] - polygon[3]))
|
||||
if ignore_tags[i] or min(height, width) < min_text_size:
|
||||
cv2.fillPoly(mask, polygon.astype(np.int32)[np.newaxis, :, :], 0)
|
||||
ignore_tags[i] = True
|
||||
else:
|
||||
polygon_shape = Polygon(polygon)
|
||||
distance = polygon_shape.area * (
|
||||
1 - np.power(shrink_ratio, 2)) / polygon_shape.length
|
||||
subject = [tuple(l) for l in text_polys[i]]
|
||||
padding = pyclipper.PyclipperOffset()
|
||||
padding.AddPath(subject, pyclipper.JT_ROUND,
|
||||
pyclipper.ET_CLOSEDPOLYGON)
|
||||
shrinked = padding.Execute(-distance)
|
||||
if shrinked == []:
|
||||
cv2.fillPoly(mask,
|
||||
polygon.astype(np.int32)[np.newaxis, :, :], 0)
|
||||
ignore_tags[i] = True
|
||||
continue
|
||||
shrinked = np.array(shrinked[0]).reshape(-1, 2)
|
||||
cv2.fillPoly(gt, [shrinked.astype(np.int32)], 1)
|
||||
# cv2.fillPoly(gt[0], [shrinked.astype(np.int32)], 1)
|
||||
|
||||
data['shrink_map'] = gt
|
||||
data['shrink_mask'] = mask
|
||||
return data
|
|
@ -0,0 +1,155 @@
|
|||
# -*- coding:utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import numpy as np
|
||||
import cv2
|
||||
import random
|
||||
|
||||
|
||||
def is_poly_in_rect(poly, x, y, w, h):
|
||||
poly = np.array(poly)
|
||||
if poly[:, 0].min() < x or poly[:, 0].max() > x + w:
|
||||
return False
|
||||
if poly[:, 1].min() < y or poly[:, 1].max() > y + h:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def is_poly_outside_rect(poly, x, y, w, h):
|
||||
poly = np.array(poly)
|
||||
if poly[:, 0].max() < x or poly[:, 0].min() > x + w:
|
||||
return True
|
||||
if poly[:, 1].max() < y or poly[:, 1].min() > y + h:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def split_regions(axis):
|
||||
regions = []
|
||||
min_axis = 0
|
||||
for i in range(1, axis.shape[0]):
|
||||
if axis[i] != axis[i - 1] + 1:
|
||||
region = axis[min_axis:i]
|
||||
min_axis = i
|
||||
regions.append(region)
|
||||
return regions
|
||||
|
||||
|
||||
def random_select(axis, max_size):
|
||||
xx = np.random.choice(axis, size=2)
|
||||
xmin = np.min(xx)
|
||||
xmax = np.max(xx)
|
||||
xmin = np.clip(xmin, 0, max_size - 1)
|
||||
xmax = np.clip(xmax, 0, max_size - 1)
|
||||
return xmin, xmax
|
||||
|
||||
|
||||
def region_wise_random_select(regions, max_size):
|
||||
selected_index = list(np.random.choice(len(regions), 2))
|
||||
selected_values = []
|
||||
for index in selected_index:
|
||||
axis = regions[index]
|
||||
xx = int(np.random.choice(axis, size=1))
|
||||
selected_values.append(xx)
|
||||
xmin = min(selected_values)
|
||||
xmax = max(selected_values)
|
||||
return xmin, xmax
|
||||
|
||||
|
||||
def crop_area(im, text_polys, min_crop_side_ratio, max_tries):
|
||||
h, w, _ = im.shape
|
||||
h_array = np.zeros(h, dtype=np.int32)
|
||||
w_array = np.zeros(w, dtype=np.int32)
|
||||
for points in text_polys:
|
||||
points = np.round(points, decimals=0).astype(np.int32)
|
||||
minx = np.min(points[:, 0])
|
||||
maxx = np.max(points[:, 0])
|
||||
w_array[minx:maxx] = 1
|
||||
miny = np.min(points[:, 1])
|
||||
maxy = np.max(points[:, 1])
|
||||
h_array[miny:maxy] = 1
|
||||
# ensure the cropped area not across a text
|
||||
h_axis = np.where(h_array == 0)[0]
|
||||
w_axis = np.where(w_array == 0)[0]
|
||||
|
||||
if len(h_axis) == 0 or len(w_axis) == 0:
|
||||
return 0, 0, w, h
|
||||
|
||||
h_regions = split_regions(h_axis)
|
||||
w_regions = split_regions(w_axis)
|
||||
|
||||
for i in range(max_tries):
|
||||
if len(w_regions) > 1:
|
||||
xmin, xmax = region_wise_random_select(w_regions, w)
|
||||
else:
|
||||
xmin, xmax = random_select(w_axis, w)
|
||||
if len(h_regions) > 1:
|
||||
ymin, ymax = region_wise_random_select(h_regions, h)
|
||||
else:
|
||||
ymin, ymax = random_select(h_axis, h)
|
||||
|
||||
if xmax - xmin < min_crop_side_ratio * w or ymax - ymin < min_crop_side_ratio * h:
|
||||
# area too small
|
||||
continue
|
||||
num_poly_in_rect = 0
|
||||
for poly in text_polys:
|
||||
if not is_poly_outside_rect(poly, xmin, ymin, xmax - xmin,
|
||||
ymax - ymin):
|
||||
num_poly_in_rect += 1
|
||||
break
|
||||
|
||||
if num_poly_in_rect > 0:
|
||||
return xmin, ymin, xmax - xmin, ymax - ymin
|
||||
|
||||
return 0, 0, w, h
|
||||
|
||||
|
||||
def RandomCropData(data, size):
|
||||
max_tries = 10
|
||||
min_crop_side_ratio = 0.1
|
||||
require_original_image = False
|
||||
keep_ratio = True
|
||||
|
||||
im = data['image']
|
||||
text_polys = data['polys']
|
||||
ignore_tags = data['ignore_tags']
|
||||
texts = data['texts']
|
||||
all_care_polys = [
|
||||
text_polys[i] for i, tag in enumerate(ignore_tags) if not tag
|
||||
]
|
||||
# 计算crop区域
|
||||
crop_x, crop_y, crop_w, crop_h = crop_area(im, all_care_polys,
|
||||
min_crop_side_ratio, max_tries)
|
||||
# crop 图片 保持比例填充
|
||||
scale_w = size[0] / crop_w
|
||||
scale_h = size[1] / crop_h
|
||||
scale = min(scale_w, scale_h)
|
||||
h = int(crop_h * scale)
|
||||
w = int(crop_w * scale)
|
||||
if keep_ratio:
|
||||
padimg = np.zeros((size[1], size[0], im.shape[2]), im.dtype)
|
||||
padimg[:h, :w] = cv2.resize(
|
||||
im[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w], (w, h))
|
||||
img = padimg
|
||||
else:
|
||||
img = cv2.resize(im[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w],
|
||||
tuple(size))
|
||||
# crop 文本框
|
||||
text_polys_crop = []
|
||||
ignore_tags_crop = []
|
||||
texts_crop = []
|
||||
for poly, text, tag in zip(text_polys, texts, ignore_tags):
|
||||
poly = ((poly - (crop_x, crop_y)) * scale).tolist()
|
||||
if not is_poly_outside_rect(poly, 0, 0, w, h):
|
||||
text_polys_crop.append(poly)
|
||||
ignore_tags_crop.append(tag)
|
||||
texts_crop.append(text)
|
||||
data['image'] = img
|
||||
data['polys'] = np.array(text_polys_crop)
|
||||
data['ignore_tags'] = ignore_tags_crop
|
||||
data['texts'] = texts_crop
|
||||
return data
|
|
@ -0,0 +1,81 @@
|
|||
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
|
||||
import os
|
||||
import random
|
||||
import numpy as np
|
||||
|
||||
import paddle
|
||||
from ppocr.utils.utility import create_module
|
||||
from copy import deepcopy
|
||||
|
||||
from .rec.img_tools import process_image
|
||||
import cv2
|
||||
|
||||
import sys
|
||||
import signal
|
||||
|
||||
|
||||
# handle terminate reader process, do not print stack frame
|
||||
def _reader_quit(signum, frame):
|
||||
print("Reader process exit.")
|
||||
sys.exit()
|
||||
|
||||
|
||||
def _term_group(sig_num, frame):
|
||||
print('pid {} terminated, terminate group '
|
||||
'{}...'.format(os.getpid(), os.getpgrp()))
|
||||
os.killpg(os.getpgid(os.getpid()), signal.SIGKILL)
|
||||
|
||||
|
||||
signal.signal(signal.SIGTERM, _reader_quit)
|
||||
signal.signal(signal.SIGINT, _term_group)
|
||||
|
||||
|
||||
def reader_main(config=None, mode=None):
|
||||
"""Create a reader for trainning
|
||||
|
||||
Args:
|
||||
settings: arguments
|
||||
|
||||
Returns:
|
||||
train reader
|
||||
"""
|
||||
assert mode in ["train", "eval", "test"],\
|
||||
"Nonsupport mode:{}".format(mode)
|
||||
global_params = config['Global']
|
||||
if mode == "train":
|
||||
params = deepcopy(config['TrainReader'])
|
||||
elif mode == "eval":
|
||||
params = deepcopy(config['EvalReader'])
|
||||
else:
|
||||
params = deepcopy(config['TestReader'])
|
||||
params['mode'] = mode
|
||||
params.update(global_params)
|
||||
reader_function = params['reader_function']
|
||||
function = create_module(reader_function)(params)
|
||||
if mode == "train":
|
||||
readers = []
|
||||
num_workers = params['num_workers']
|
||||
for process_id in range(num_workers):
|
||||
readers.append(function(process_id))
|
||||
return paddle.reader.multiprocess_reader(readers, False)
|
||||
else:
|
||||
return function(mode)
|
||||
|
||||
|
||||
def test_reader(image_shape, img_path):
|
||||
img = cv2.imread(img_path)
|
||||
norm_img = process_image(img, image_shape)
|
||||
return norm_img
|
|
@ -0,0 +1,13 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
|
@ -0,0 +1,201 @@
|
|||
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
|
||||
import os
|
||||
import math
|
||||
import random
|
||||
import numpy as np
|
||||
import cv2
|
||||
|
||||
import string
|
||||
import lmdb
|
||||
|
||||
from ppocr.utils.utility import initial_logger
|
||||
logger = initial_logger()
|
||||
|
||||
from .img_tools import process_image, get_img_data
|
||||
|
||||
|
||||
class LMDBReader(object):
|
||||
def __init__(self, params):
|
||||
if params['mode'] != 'train':
|
||||
self.num_workers = 1
|
||||
else:
|
||||
self.num_workers = params['num_workers']
|
||||
self.lmdb_sets_dir = params['lmdb_sets_dir']
|
||||
self.char_ops = params['char_ops']
|
||||
self.image_shape = params['image_shape']
|
||||
self.loss_type = params['loss_type']
|
||||
self.max_text_length = params['max_text_length']
|
||||
self.mode = params['mode']
|
||||
if params['mode'] == 'train':
|
||||
self.batch_size = params['train_batch_size_per_card']
|
||||
else:
|
||||
self.batch_size = params['test_batch_size_per_card']
|
||||
|
||||
def load_hierarchical_lmdb_dataset(self):
|
||||
lmdb_sets = {}
|
||||
dataset_idx = 0
|
||||
for dirpath, dirnames, filenames in os.walk(self.lmdb_sets_dir + '/'):
|
||||
if not dirnames:
|
||||
env = lmdb.open(
|
||||
dirpath,
|
||||
max_readers=32,
|
||||
readonly=True,
|
||||
lock=False,
|
||||
readahead=False,
|
||||
meminit=False)
|
||||
txn = env.begin(write=False)
|
||||
num_samples = int(txn.get('num-samples'.encode()))
|
||||
lmdb_sets[dataset_idx] = {"dirpath":dirpath, "env":env, \
|
||||
"txn":txn, "num_samples":num_samples}
|
||||
dataset_idx += 1
|
||||
return lmdb_sets
|
||||
|
||||
def print_lmdb_sets_info(self, lmdb_sets):
|
||||
lmdb_info_strs = []
|
||||
for dataset_idx in range(len(lmdb_sets)):
|
||||
tmp_str = " %s:%d," % (lmdb_sets[dataset_idx]['dirpath'],
|
||||
lmdb_sets[dataset_idx]['num_samples'])
|
||||
lmdb_info_strs.append(tmp_str)
|
||||
lmdb_info_strs = ''.join(lmdb_info_strs)
|
||||
logger.info("DataSummary:" + lmdb_info_strs)
|
||||
return
|
||||
|
||||
def close_lmdb_dataset(self, lmdb_sets):
|
||||
for dataset_idx in lmdb_sets:
|
||||
lmdb_sets[dataset_idx]['env'].close()
|
||||
return
|
||||
|
||||
def get_lmdb_sample_info(self, txn, index):
|
||||
label_key = 'label-%09d'.encode() % index
|
||||
label = txn.get(label_key)
|
||||
if label is None:
|
||||
return None
|
||||
label = label.decode('utf-8')
|
||||
img_key = 'image-%09d'.encode() % index
|
||||
imgbuf = txn.get(img_key)
|
||||
img = get_img_data(imgbuf)
|
||||
if img is None:
|
||||
return None
|
||||
return img, label
|
||||
|
||||
def __call__(self, process_id):
|
||||
if self.mode != 'train':
|
||||
process_id = 0
|
||||
|
||||
def sample_iter_reader():
|
||||
lmdb_sets = self.load_hierarchical_lmdb_dataset()
|
||||
if process_id == 0:
|
||||
self.print_lmdb_sets_info(lmdb_sets)
|
||||
cur_index_sets = [1 + process_id] * len(lmdb_sets)
|
||||
while True:
|
||||
finish_read_num = 0
|
||||
for dataset_idx in range(len(lmdb_sets)):
|
||||
cur_index = cur_index_sets[dataset_idx]
|
||||
if cur_index > lmdb_sets[dataset_idx]['num_samples']:
|
||||
finish_read_num += 1
|
||||
else:
|
||||
sample_info = self.get_lmdb_sample_info(
|
||||
lmdb_sets[dataset_idx]['txn'], cur_index)
|
||||
cur_index_sets[dataset_idx] += self.num_workers
|
||||
if sample_info is None:
|
||||
continue
|
||||
img, label = sample_info
|
||||
outs = process_image(img, self.image_shape, label,
|
||||
self.char_ops, self.loss_type,
|
||||
self.max_text_length)
|
||||
if outs is None:
|
||||
continue
|
||||
yield outs
|
||||
|
||||
if finish_read_num == len(lmdb_sets):
|
||||
break
|
||||
self.close_lmdb_dataset(lmdb_sets)
|
||||
|
||||
def batch_iter_reader():
|
||||
batch_outs = []
|
||||
for outs in sample_iter_reader():
|
||||
batch_outs.append(outs)
|
||||
if len(batch_outs) == self.batch_size:
|
||||
yield batch_outs
|
||||
batch_outs = []
|
||||
if len(batch_outs) != 0:
|
||||
yield batch_outs
|
||||
|
||||
return batch_iter_reader
|
||||
|
||||
|
||||
class SimpleReader(object):
|
||||
def __init__(self, params):
|
||||
if params['mode'] != 'train':
|
||||
self.num_workers = 1
|
||||
else:
|
||||
self.num_workers = params['num_workers']
|
||||
self.img_set_dir = params['img_set_dir']
|
||||
self.label_file_path = params['label_file_path']
|
||||
self.char_ops = params['char_ops']
|
||||
self.image_shape = params['image_shape']
|
||||
self.loss_type = params['loss_type']
|
||||
self.max_text_length = params['max_text_length']
|
||||
self.mode = params['mode']
|
||||
if params['mode'] == 'train':
|
||||
self.batch_size = params['train_batch_size_per_card']
|
||||
elif params['mode'] == 'eval':
|
||||
self.batch_size = params['test_batch_size_per_card']
|
||||
else:
|
||||
self.batch_size = 1
|
||||
self.infer_img = params['infer_img']
|
||||
|
||||
def __call__(self, process_id):
|
||||
if self.mode != 'train':
|
||||
process_id = 0
|
||||
|
||||
def sample_iter_reader():
|
||||
if self.mode == 'test':
|
||||
print("infer_img:", self.infer_img)
|
||||
img = cv2.imread(self.infer_img)
|
||||
norm_img = process_image(img, self.image_shape)
|
||||
yield norm_img
|
||||
with open(self.label_file_path, "rb") as fin:
|
||||
label_infor_list = fin.readlines()
|
||||
img_num = len(label_infor_list)
|
||||
img_id_list = list(range(img_num))
|
||||
random.shuffle(img_id_list)
|
||||
for img_id in range(process_id, img_num, self.num_workers):
|
||||
label_infor = label_infor_list[img_id_list[img_id]]
|
||||
substr = label_infor.decode('utf-8').strip("\n").split("\t")
|
||||
img_path = self.img_set_dir + "/" + substr[0]
|
||||
img = cv2.imread(img_path)
|
||||
if img is None:
|
||||
continue
|
||||
label = substr[1]
|
||||
outs = process_image(img, self.image_shape, label,
|
||||
self.char_ops, self.loss_type,
|
||||
self.max_text_length)
|
||||
if outs is None:
|
||||
continue
|
||||
yield outs
|
||||
|
||||
def batch_iter_reader():
|
||||
batch_outs = []
|
||||
for outs in sample_iter_reader():
|
||||
batch_outs.append(outs)
|
||||
if len(batch_outs) == self.batch_size:
|
||||
yield batch_outs
|
||||
batch_outs = []
|
||||
if len(batch_outs) != 0:
|
||||
yield batch_outs
|
||||
|
||||
return batch_iter_reader
|
|
@ -0,0 +1,92 @@
|
|||
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
|
||||
import math
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
|
||||
def get_bounding_box_rect(pos):
|
||||
left = min(pos[0])
|
||||
right = max(pos[0])
|
||||
top = min(pos[1])
|
||||
bottom = max(pos[1])
|
||||
return [left, top, right, bottom]
|
||||
|
||||
|
||||
def resize_norm_img(img, image_shape):
|
||||
imgC, imgH, imgW = image_shape
|
||||
h = img.shape[0]
|
||||
w = img.shape[1]
|
||||
ratio = w / float(h)
|
||||
if math.ceil(imgH * ratio) > imgW:
|
||||
resized_w = imgW
|
||||
else:
|
||||
resized_w = int(math.ceil(imgH * ratio))
|
||||
resized_image = cv2.resize(img, (resized_w, imgH))
|
||||
resized_image = resized_image.astype('float32')
|
||||
if image_shape[0] == 1:
|
||||
resized_image = resized_image / 255
|
||||
resized_image = resized_image[np.newaxis, :]
|
||||
else:
|
||||
resized_image = resized_image.transpose((2, 0, 1)) / 255
|
||||
resized_image -= 0.5
|
||||
resized_image /= 0.5
|
||||
padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
|
||||
padding_im[:, :, 0:resized_w] = resized_image
|
||||
return padding_im
|
||||
|
||||
|
||||
def get_img_data(value):
|
||||
"""get_img_data"""
|
||||
if not value:
|
||||
return None
|
||||
imgdata = np.frombuffer(value, dtype='uint8')
|
||||
if imgdata is None:
|
||||
return None
|
||||
imgori = cv2.imdecode(imgdata, 1)
|
||||
if imgori is None:
|
||||
return None
|
||||
return imgori
|
||||
|
||||
|
||||
def process_image(img,
|
||||
image_shape,
|
||||
label=None,
|
||||
char_ops=None,
|
||||
loss_type=None,
|
||||
max_text_length=None):
|
||||
norm_img = resize_norm_img(img, image_shape)
|
||||
norm_img = norm_img[np.newaxis, :]
|
||||
if label is not None:
|
||||
char_num = char_ops.get_char_num()
|
||||
text = char_ops.encode(label)
|
||||
if len(text) == 0 or len(text) > max_text_length:
|
||||
return None
|
||||
else:
|
||||
if loss_type == "ctc":
|
||||
text = text.reshape(-1, 1)
|
||||
return (norm_img, text)
|
||||
elif loss_type == "attention":
|
||||
beg_flag_idx = char_ops.get_beg_end_flag_idx("beg")
|
||||
end_flag_idx = char_ops.get_beg_end_flag_idx("end")
|
||||
beg_text = np.append(beg_flag_idx, text)
|
||||
end_text = np.append(text, end_flag_idx)
|
||||
beg_text = beg_text.reshape(-1, 1)
|
||||
end_text = end_text.reshape(-1, 1)
|
||||
return (norm_img, beg_text, end_text)
|
||||
else:
|
||||
assert False, "Unsupport loss_type %s in process_image"\
|
||||
% loss_type
|
||||
return (norm_img)
|
|
@ -0,0 +1,13 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
|
@ -0,0 +1,119 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from paddle import fluid
|
||||
|
||||
from ppocr.utils.utility import create_module
|
||||
from ppocr.utils.utility import initial_logger
|
||||
logger = initial_logger()
|
||||
from copy import deepcopy
|
||||
|
||||
|
||||
class DetModel(object):
|
||||
def __init__(self, params):
|
||||
"""
|
||||
Detection module for OCR text detection.
|
||||
args:
|
||||
params (dict): the super parameters for detection module.
|
||||
"""
|
||||
global_params = params['Global']
|
||||
self.algorithm = global_params['algorithm']
|
||||
|
||||
backbone_params = deepcopy(params["Backbone"])
|
||||
backbone_params.update(global_params)
|
||||
self.backbone = create_module(backbone_params['function'])\
|
||||
(params=backbone_params)
|
||||
|
||||
head_params = deepcopy(params["Head"])
|
||||
head_params.update(global_params)
|
||||
self.head = create_module(head_params['function'])\
|
||||
(params=head_params)
|
||||
|
||||
loss_params = deepcopy(params["Loss"])
|
||||
loss_params.update(global_params)
|
||||
self.loss = create_module(loss_params['function'])\
|
||||
(params=loss_params)
|
||||
|
||||
self.image_shape = global_params['image_shape']
|
||||
|
||||
def create_feed(self, mode):
|
||||
"""
|
||||
create Dataloader feeds
|
||||
args:
|
||||
mode (str): 'train' for training or else for evaluation
|
||||
return: (image, corresponding label, dataloader)
|
||||
"""
|
||||
image_shape = deepcopy(self.image_shape)
|
||||
image = fluid.layers.data(
|
||||
name='image', shape=image_shape, dtype='float32')
|
||||
if mode == "train":
|
||||
if self.algorithm == "EAST":
|
||||
score = fluid.layers.data(
|
||||
name='score', shape=[1, 128, 128], dtype='float32')
|
||||
geo = fluid.layers.data(
|
||||
name='geo', shape=[9, 128, 128], dtype='float32')
|
||||
mask = fluid.layers.data(
|
||||
name='mask', shape=[1, 128, 128], dtype='float32')
|
||||
feed_list = [image, score, geo, mask]
|
||||
labels = {'score': score, 'geo': geo, 'mask': mask}
|
||||
elif self.algorithm == "DB":
|
||||
shrink_map = fluid.layers.data(
|
||||
name='shrink_map', shape=image_shape[1:], dtype='float32')
|
||||
shrink_mask = fluid.layers.data(
|
||||
name='shrink_mask', shape=image_shape[1:], dtype='float32')
|
||||
threshold_map = fluid.layers.data(
|
||||
name='threshold_map',
|
||||
shape=image_shape[1:],
|
||||
dtype='float32')
|
||||
threshold_mask = fluid.layers.data(
|
||||
name='threshold_mask',
|
||||
shape=image_shape[1:],
|
||||
dtype='float32')
|
||||
feed_list=[image, shrink_map, shrink_mask,\
|
||||
threshold_map, threshold_mask]
|
||||
labels = {'shrink_map':shrink_map,\
|
||||
'shrink_mask':shrink_mask,\
|
||||
'threshold_map':threshold_map,\
|
||||
'threshold_mask':threshold_mask}
|
||||
loader = fluid.io.DataLoader.from_generator(
|
||||
feed_list=feed_list,
|
||||
capacity=64,
|
||||
use_double_buffer=True,
|
||||
iterable=False)
|
||||
else:
|
||||
labels = None
|
||||
loader = None
|
||||
return image, labels, loader
|
||||
|
||||
def __call__(self, mode):
|
||||
"""
|
||||
run forward of defined module
|
||||
args:
|
||||
mode (str): 'train' for training; 'export' for inference,
|
||||
others for evaluation]
|
||||
"""
|
||||
image, labels, loader = self.create_feed(mode)
|
||||
conv_feas = self.backbone(image)
|
||||
predicts = self.head(conv_feas)
|
||||
if mode == "train":
|
||||
losses = self.loss(predicts, labels)
|
||||
return loader, losses
|
||||
elif mode == "export":
|
||||
return [image, predicts]
|
||||
else:
|
||||
return loader, predicts
|
|
@ -0,0 +1,114 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from paddle import fluid
|
||||
|
||||
from ppocr.utils.utility import create_module
|
||||
from ppocr.utils.utility import initial_logger
|
||||
logger = initial_logger()
|
||||
from copy import deepcopy
|
||||
|
||||
|
||||
class RecModel(object):
|
||||
def __init__(self, params):
|
||||
super(RecModel, self).__init__()
|
||||
global_params = params['Global']
|
||||
char_num = global_params['char_ops'].get_char_num()
|
||||
global_params['char_num'] = char_num
|
||||
if "TPS" in params:
|
||||
tps_params = deepcopy(params["TPS"])
|
||||
tps_params.update(global_params)
|
||||
self.tps = create_module(tps_params['function'])\
|
||||
(params=tps_params)
|
||||
else:
|
||||
self.tps = None
|
||||
|
||||
backbone_params = deepcopy(params["Backbone"])
|
||||
backbone_params.update(global_params)
|
||||
self.backbone = create_module(backbone_params['function'])\
|
||||
(params=backbone_params)
|
||||
|
||||
head_params = deepcopy(params["Head"])
|
||||
head_params.update(global_params)
|
||||
self.head = create_module(head_params['function'])\
|
||||
(params=head_params)
|
||||
|
||||
loss_params = deepcopy(params["Loss"])
|
||||
loss_params.update(global_params)
|
||||
self.loss = create_module(loss_params['function'])\
|
||||
(params=loss_params)
|
||||
|
||||
self.loss_type = global_params['loss_type']
|
||||
self.image_shape = global_params['image_shape']
|
||||
self.max_text_length = global_params['max_text_length']
|
||||
|
||||
def create_feed(self, mode):
|
||||
image_shape = deepcopy(self.image_shape)
|
||||
image_shape.insert(0, -1)
|
||||
image = fluid.data(name='image', shape=image_shape, dtype='float32')
|
||||
if mode == "train":
|
||||
if self.loss_type == "attention":
|
||||
label_in = fluid.data(
|
||||
name='label_in',
|
||||
shape=[None, 1],
|
||||
dtype='int32',
|
||||
lod_level=1)
|
||||
label_out = fluid.data(
|
||||
name='label_out',
|
||||
shape=[None, 1],
|
||||
dtype='int32',
|
||||
lod_level=1)
|
||||
feed_list = [image, label_in, label_out]
|
||||
labels = {'label_in': label_in, 'label_out': label_out}
|
||||
else:
|
||||
label = fluid.data(
|
||||
name='label', shape=[None, 1], dtype='int32', lod_level=1)
|
||||
feed_list = [image, label]
|
||||
labels = {'label': label}
|
||||
loader = fluid.io.DataLoader.from_generator(
|
||||
feed_list=feed_list,
|
||||
capacity=64,
|
||||
use_double_buffer=True,
|
||||
iterable=False)
|
||||
else:
|
||||
labels = None
|
||||
loader = None
|
||||
return image, labels, loader
|
||||
|
||||
def __call__(self, mode):
|
||||
image, labels, loader = self.create_feed(mode)
|
||||
if self.tps is None:
|
||||
inputs = image
|
||||
else:
|
||||
inputs = self.tps(image)
|
||||
conv_feas = self.backbone(inputs)
|
||||
predicts = self.head(conv_feas, labels, mode)
|
||||
decoded_out = predicts['decoded_out']
|
||||
if mode == "train":
|
||||
loss = self.loss(predicts, labels)
|
||||
if self.loss_type == "attention":
|
||||
label = labels['label_out']
|
||||
else:
|
||||
label = labels['label']
|
||||
outputs = {'total_loss':loss, 'decoded_out':\
|
||||
decoded_out, 'label':label}
|
||||
return loader, outputs
|
||||
elif mode == "export":
|
||||
return [image, {'decoded_out': decoded_out}]
|
||||
else:
|
||||
return loader, {'decoded_out': decoded_out}
|
|
@ -0,0 +1,251 @@
|
|||
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle.fluid as fluid
|
||||
from paddle.fluid.initializer import MSRA
|
||||
from paddle.fluid.param_attr import ParamAttr
|
||||
|
||||
__all__ = ['MobileNetV3']
|
||||
|
||||
|
||||
class MobileNetV3():
|
||||
def __init__(self, params):
|
||||
"""
|
||||
the MobilenetV3 backbone network for detection module.
|
||||
Args:
|
||||
params(dict): the super parameters for build network
|
||||
"""
|
||||
self.scale = params['scale']
|
||||
model_name = params['model_name']
|
||||
self.inplanes = 16
|
||||
if model_name == "large":
|
||||
self.cfg = [
|
||||
# k, exp, c, se, nl, s,
|
||||
[3, 16, 16, False, 'relu', 1],
|
||||
[3, 64, 24, False, 'relu', 2],
|
||||
[3, 72, 24, False, 'relu', 1],
|
||||
[5, 72, 40, True, 'relu', 2],
|
||||
[5, 120, 40, True, 'relu', 1],
|
||||
[5, 120, 40, True, 'relu', 1],
|
||||
[3, 240, 80, False, 'hard_swish', 2],
|
||||
[3, 200, 80, False, 'hard_swish', 1],
|
||||
[3, 184, 80, False, 'hard_swish', 1],
|
||||
[3, 184, 80, False, 'hard_swish', 1],
|
||||
[3, 480, 112, True, 'hard_swish', 1],
|
||||
[3, 672, 112, True, 'hard_swish', 1],
|
||||
[5, 672, 160, True, 'hard_swish', 2],
|
||||
[5, 960, 160, True, 'hard_swish', 1],
|
||||
[5, 960, 160, True, 'hard_swish', 1],
|
||||
]
|
||||
self.cls_ch_squeeze = 960
|
||||
self.cls_ch_expand = 1280
|
||||
elif model_name == "small":
|
||||
self.cfg = [
|
||||
# k, exp, c, se, nl, s,
|
||||
[3, 16, 16, True, 'relu', 2],
|
||||
[3, 72, 24, False, 'relu', 2],
|
||||
[3, 88, 24, False, 'relu', 1],
|
||||
[5, 96, 40, True, 'hard_swish', 2],
|
||||
[5, 240, 40, True, 'hard_swish', 1],
|
||||
[5, 240, 40, True, 'hard_swish', 1],
|
||||
[5, 120, 48, True, 'hard_swish', 1],
|
||||
[5, 144, 48, True, 'hard_swish', 1],
|
||||
[5, 288, 96, True, 'hard_swish', 2],
|
||||
[5, 576, 96, True, 'hard_swish', 1],
|
||||
[5, 576, 96, True, 'hard_swish', 1],
|
||||
]
|
||||
self.cls_ch_squeeze = 576
|
||||
self.cls_ch_expand = 1280
|
||||
else:
|
||||
raise NotImplementedError("mode[" + model_name +
|
||||
"_model] is not implemented!")
|
||||
|
||||
supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25]
|
||||
assert self.scale in supported_scale, \
|
||||
"supported scale are {} but input scale is {}".format(supported_scale, self.scale)
|
||||
|
||||
def __call__(self, input):
|
||||
scale = self.scale
|
||||
inplanes = self.inplanes
|
||||
cfg = self.cfg
|
||||
cls_ch_squeeze = self.cls_ch_squeeze
|
||||
cls_ch_expand = self.cls_ch_expand
|
||||
#conv1
|
||||
conv = self.conv_bn_layer(
|
||||
input,
|
||||
filter_size=3,
|
||||
num_filters=self.make_divisible(inplanes * scale),
|
||||
stride=2,
|
||||
padding=1,
|
||||
num_groups=1,
|
||||
if_act=True,
|
||||
act='hard_swish',
|
||||
name='conv1')
|
||||
i = 0
|
||||
inplanes = self.make_divisible(inplanes * scale)
|
||||
outs = []
|
||||
for layer_cfg in cfg:
|
||||
if layer_cfg[5] == 2 and i > 2:
|
||||
outs.append(conv)
|
||||
conv = self.residual_unit(
|
||||
input=conv,
|
||||
num_in_filter=inplanes,
|
||||
num_mid_filter=self.make_divisible(scale * layer_cfg[1]),
|
||||
num_out_filter=self.make_divisible(scale * layer_cfg[2]),
|
||||
act=layer_cfg[4],
|
||||
stride=layer_cfg[5],
|
||||
filter_size=layer_cfg[0],
|
||||
use_se=layer_cfg[3],
|
||||
name='conv' + str(i + 2))
|
||||
inplanes = self.make_divisible(scale * layer_cfg[2])
|
||||
i += 1
|
||||
|
||||
conv = self.conv_bn_layer(
|
||||
input=conv,
|
||||
filter_size=1,
|
||||
num_filters=self.make_divisible(scale * cls_ch_squeeze),
|
||||
stride=1,
|
||||
padding=0,
|
||||
num_groups=1,
|
||||
if_act=True,
|
||||
act='hard_swish',
|
||||
name='conv_last')
|
||||
outs.append(conv)
|
||||
return outs
|
||||
|
||||
def conv_bn_layer(self,
|
||||
input,
|
||||
filter_size,
|
||||
num_filters,
|
||||
stride,
|
||||
padding,
|
||||
num_groups=1,
|
||||
if_act=True,
|
||||
act=None,
|
||||
name=None,
|
||||
use_cudnn=True,
|
||||
res_last_bn_init=False):
|
||||
conv = fluid.layers.conv2d(
|
||||
input=input,
|
||||
num_filters=num_filters,
|
||||
filter_size=filter_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
groups=num_groups,
|
||||
act=None,
|
||||
use_cudnn=use_cudnn,
|
||||
param_attr=ParamAttr(name=name + '_weights'),
|
||||
bias_attr=False)
|
||||
bn_name = name + '_bn'
|
||||
bn = fluid.layers.batch_norm(
|
||||
input=conv,
|
||||
param_attr=ParamAttr(
|
||||
name=bn_name + "_scale",
|
||||
regularizer=fluid.regularizer.L2DecayRegularizer(
|
||||
regularization_coeff=0.0)),
|
||||
bias_attr=ParamAttr(
|
||||
name=bn_name + "_offset",
|
||||
regularizer=fluid.regularizer.L2DecayRegularizer(
|
||||
regularization_coeff=0.0)),
|
||||
moving_mean_name=bn_name + '_mean',
|
||||
moving_variance_name=bn_name + '_variance')
|
||||
if if_act:
|
||||
if act == 'relu':
|
||||
bn = fluid.layers.relu(bn)
|
||||
elif act == 'hard_swish':
|
||||
bn = fluid.layers.hard_swish(bn)
|
||||
return bn
|
||||
|
||||
def make_divisible(self, v, divisor=8, min_value=None):
|
||||
if min_value is None:
|
||||
min_value = divisor
|
||||
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
|
||||
if new_v < 0.9 * v:
|
||||
new_v += divisor
|
||||
return new_v
|
||||
|
||||
def se_block(self, input, num_out_filter, ratio=4, name=None):
|
||||
num_mid_filter = num_out_filter // ratio
|
||||
pool = fluid.layers.pool2d(
|
||||
input=input, pool_type='avg', global_pooling=True, use_cudnn=False)
|
||||
conv1 = fluid.layers.conv2d(
|
||||
input=pool,
|
||||
filter_size=1,
|
||||
num_filters=num_mid_filter,
|
||||
act='relu',
|
||||
param_attr=ParamAttr(name=name + '_1_weights'),
|
||||
bias_attr=ParamAttr(name=name + '_1_offset'))
|
||||
conv2 = fluid.layers.conv2d(
|
||||
input=conv1,
|
||||
filter_size=1,
|
||||
num_filters=num_out_filter,
|
||||
act='hard_sigmoid',
|
||||
param_attr=ParamAttr(name=name + '_2_weights'),
|
||||
bias_attr=ParamAttr(name=name + '_2_offset'))
|
||||
scale = fluid.layers.elementwise_mul(x=input, y=conv2, axis=0)
|
||||
return scale
|
||||
|
||||
def residual_unit(self,
|
||||
input,
|
||||
num_in_filter,
|
||||
num_mid_filter,
|
||||
num_out_filter,
|
||||
stride,
|
||||
filter_size,
|
||||
act=None,
|
||||
use_se=False,
|
||||
name=None):
|
||||
|
||||
conv0 = self.conv_bn_layer(
|
||||
input=input,
|
||||
filter_size=1,
|
||||
num_filters=num_mid_filter,
|
||||
stride=1,
|
||||
padding=0,
|
||||
if_act=True,
|
||||
act=act,
|
||||
name=name + '_expand')
|
||||
|
||||
conv1 = self.conv_bn_layer(
|
||||
input=conv0,
|
||||
filter_size=filter_size,
|
||||
num_filters=num_mid_filter,
|
||||
stride=stride,
|
||||
padding=int((filter_size - 1) // 2),
|
||||
if_act=True,
|
||||
act=act,
|
||||
num_groups=num_mid_filter,
|
||||
use_cudnn=False,
|
||||
name=name + '_depthwise')
|
||||
if use_se:
|
||||
conv1 = self.se_block(
|
||||
input=conv1, num_out_filter=num_mid_filter, name=name + '_se')
|
||||
|
||||
conv2 = self.conv_bn_layer(
|
||||
input=conv1,
|
||||
filter_size=1,
|
||||
num_filters=num_out_filter,
|
||||
stride=1,
|
||||
padding=0,
|
||||
if_act=False,
|
||||
name=name + '_linear',
|
||||
res_last_bn_init=True)
|
||||
if num_in_filter != num_out_filter or stride != 1:
|
||||
return conv2
|
||||
else:
|
||||
return fluid.layers.elementwise_add(x=input, y=conv2, act=None)
|
|
@ -0,0 +1,252 @@
|
|||
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle.fluid as fluid
|
||||
from paddle.fluid.param_attr import ParamAttr
|
||||
|
||||
__all__ = ["ResNet"]
|
||||
|
||||
|
||||
class ResNet(object):
|
||||
def __init__(self, params):
|
||||
"""
|
||||
the Resnet backbone network for detection module.
|
||||
Args:
|
||||
params(dict): the super parameters for network build
|
||||
"""
|
||||
self.layers = params['layers']
|
||||
supported_layers = [18, 34, 50, 101, 152]
|
||||
assert self.layers in supported_layers, \
|
||||
"supported layers are {} but input layer is {}".format(supported_layers, self.layers)
|
||||
self.is_3x3 = True
|
||||
|
||||
def __call__(self, input):
|
||||
layers = self.layers
|
||||
is_3x3 = self.is_3x3
|
||||
if layers == 18:
|
||||
depth = [2, 2, 2, 2]
|
||||
elif layers == 34 or layers == 50:
|
||||
depth = [3, 4, 6, 3]
|
||||
elif layers == 101:
|
||||
depth = [3, 4, 23, 3]
|
||||
elif layers == 152:
|
||||
depth = [3, 8, 36, 3]
|
||||
elif layers == 200:
|
||||
depth = [3, 12, 48, 3]
|
||||
num_filters = [64, 128, 256, 512]
|
||||
outs = []
|
||||
|
||||
if is_3x3 == False:
|
||||
conv = self.conv_bn_layer(
|
||||
input=input,
|
||||
num_filters=64,
|
||||
filter_size=7,
|
||||
stride=2,
|
||||
act='relu')
|
||||
else:
|
||||
conv = self.conv_bn_layer(
|
||||
input=input,
|
||||
num_filters=32,
|
||||
filter_size=3,
|
||||
stride=2,
|
||||
act='relu',
|
||||
name='conv1_1')
|
||||
conv = self.conv_bn_layer(
|
||||
input=conv,
|
||||
num_filters=32,
|
||||
filter_size=3,
|
||||
stride=1,
|
||||
act='relu',
|
||||
name='conv1_2')
|
||||
conv = self.conv_bn_layer(
|
||||
input=conv,
|
||||
num_filters=64,
|
||||
filter_size=3,
|
||||
stride=1,
|
||||
act='relu',
|
||||
name='conv1_3')
|
||||
|
||||
conv = fluid.layers.pool2d(
|
||||
input=conv,
|
||||
pool_size=3,
|
||||
pool_stride=2,
|
||||
pool_padding=1,
|
||||
pool_type='max')
|
||||
|
||||
if layers >= 50:
|
||||
for block in range(len(depth)):
|
||||
for i in range(depth[block]):
|
||||
if layers in [101, 152, 200] and block == 2:
|
||||
if i == 0:
|
||||
conv_name = "res" + str(block + 2) + "a"
|
||||
else:
|
||||
conv_name = "res" + str(block + 2) + "b" + str(i)
|
||||
else:
|
||||
conv_name = "res" + str(block + 2) + chr(97 + i)
|
||||
conv = self.bottleneck_block(
|
||||
input=conv,
|
||||
num_filters=num_filters[block],
|
||||
stride=2 if i == 0 and block != 0 else 1,
|
||||
if_first=block == i == 0,
|
||||
name=conv_name)
|
||||
outs.append(conv)
|
||||
else:
|
||||
for block in range(len(depth)):
|
||||
for i in range(depth[block]):
|
||||
conv_name = "res" + str(block + 2) + chr(97 + i)
|
||||
conv = self.basic_block(
|
||||
input=conv,
|
||||
num_filters=num_filters[block],
|
||||
stride=2 if i == 0 and block != 0 else 1,
|
||||
if_first=block == i == 0,
|
||||
name=conv_name)
|
||||
outs.append(conv)
|
||||
return outs
|
||||
|
||||
def conv_bn_layer(self,
|
||||
input,
|
||||
num_filters,
|
||||
filter_size,
|
||||
stride=1,
|
||||
groups=1,
|
||||
act=None,
|
||||
name=None):
|
||||
conv = fluid.layers.conv2d(
|
||||
input=input,
|
||||
num_filters=num_filters,
|
||||
filter_size=filter_size,
|
||||
stride=stride,
|
||||
padding=(filter_size - 1) // 2,
|
||||
groups=groups,
|
||||
act=None,
|
||||
param_attr=ParamAttr(name=name + "_weights"),
|
||||
bias_attr=False)
|
||||
if name == "conv1":
|
||||
bn_name = "bn_" + name
|
||||
else:
|
||||
bn_name = "bn" + name[3:]
|
||||
return fluid.layers.batch_norm(
|
||||
input=conv,
|
||||
act=act,
|
||||
param_attr=ParamAttr(name=bn_name + '_scale'),
|
||||
bias_attr=ParamAttr(bn_name + '_offset'),
|
||||
moving_mean_name=bn_name + '_mean',
|
||||
moving_variance_name=bn_name + '_variance')
|
||||
|
||||
def conv_bn_layer_new(self,
|
||||
input,
|
||||
num_filters,
|
||||
filter_size,
|
||||
stride=1,
|
||||
groups=1,
|
||||
act=None,
|
||||
name=None):
|
||||
pool = fluid.layers.pool2d(
|
||||
input=input,
|
||||
pool_size=2,
|
||||
pool_stride=2,
|
||||
pool_padding=0,
|
||||
pool_type='avg',
|
||||
ceil_mode=True)
|
||||
|
||||
conv = fluid.layers.conv2d(
|
||||
input=pool,
|
||||
num_filters=num_filters,
|
||||
filter_size=filter_size,
|
||||
stride=1,
|
||||
padding=(filter_size - 1) // 2,
|
||||
groups=groups,
|
||||
act=None,
|
||||
param_attr=ParamAttr(name=name + "_weights"),
|
||||
bias_attr=False)
|
||||
if name == "conv1":
|
||||
bn_name = "bn_" + name
|
||||
else:
|
||||
bn_name = "bn" + name[3:]
|
||||
return fluid.layers.batch_norm(
|
||||
input=conv,
|
||||
act=act,
|
||||
param_attr=ParamAttr(name=bn_name + '_scale'),
|
||||
bias_attr=ParamAttr(bn_name + '_offset'),
|
||||
moving_mean_name=bn_name + '_mean',
|
||||
moving_variance_name=bn_name + '_variance')
|
||||
|
||||
def shortcut(self, input, ch_out, stride, name, if_first=False):
|
||||
ch_in = input.shape[1]
|
||||
if ch_in != ch_out or stride != 1:
|
||||
if if_first:
|
||||
return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
|
||||
else:
|
||||
return self.conv_bn_layer_new(
|
||||
input, ch_out, 1, stride, name=name)
|
||||
elif if_first:
|
||||
return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
|
||||
else:
|
||||
return input
|
||||
|
||||
def bottleneck_block(self, input, num_filters, stride, name, if_first):
|
||||
conv0 = self.conv_bn_layer(
|
||||
input=input,
|
||||
num_filters=num_filters,
|
||||
filter_size=1,
|
||||
act='relu',
|
||||
name=name + "_branch2a")
|
||||
conv1 = self.conv_bn_layer(
|
||||
input=conv0,
|
||||
num_filters=num_filters,
|
||||
filter_size=3,
|
||||
stride=stride,
|
||||
act='relu',
|
||||
name=name + "_branch2b")
|
||||
conv2 = self.conv_bn_layer(
|
||||
input=conv1,
|
||||
num_filters=num_filters * 4,
|
||||
filter_size=1,
|
||||
act=None,
|
||||
name=name + "_branch2c")
|
||||
|
||||
short = self.shortcut(
|
||||
input,
|
||||
num_filters * 4,
|
||||
stride,
|
||||
if_first=if_first,
|
||||
name=name + "_branch1")
|
||||
|
||||
return fluid.layers.elementwise_add(x=short, y=conv2, act='relu')
|
||||
|
||||
def basic_block(self, input, num_filters, stride, name, if_first):
|
||||
conv0 = self.conv_bn_layer(
|
||||
input=input,
|
||||
num_filters=num_filters,
|
||||
filter_size=3,
|
||||
act='relu',
|
||||
stride=stride,
|
||||
name=name + "_branch2a")
|
||||
conv1 = self.conv_bn_layer(
|
||||
input=conv0,
|
||||
num_filters=num_filters,
|
||||
filter_size=3,
|
||||
act=None,
|
||||
name=name + "_branch2b")
|
||||
short = self.shortcut(
|
||||
input,
|
||||
num_filters,
|
||||
stride,
|
||||
if_first=if_first,
|
||||
name=name + "_branch1")
|
||||
return fluid.layers.elementwise_add(x=short, y=conv1, act='relu')
|
|
@ -0,0 +1,255 @@
|
|||
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle.fluid as fluid
|
||||
from paddle.fluid.initializer import MSRA
|
||||
from paddle.fluid.param_attr import ParamAttr
|
||||
|
||||
__all__ = [
|
||||
'MobileNetV3', 'MobileNetV3_small_x0_35', 'MobileNetV3_small_x0_5',
|
||||
'MobileNetV3_small_x0_75', 'MobileNetV3_small_x1_0',
|
||||
'MobileNetV3_small_x1_25', 'MobileNetV3_large_x0_35',
|
||||
'MobileNetV3_large_x0_5', 'MobileNetV3_large_x0_75',
|
||||
'MobileNetV3_large_x1_0', 'MobileNetV3_large_x1_25'
|
||||
]
|
||||
|
||||
|
||||
class MobileNetV3():
|
||||
def __init__(self, params):
|
||||
self.scale = params['scale']
|
||||
model_name = params['model_name']
|
||||
self.inplanes = 16
|
||||
if model_name == "large":
|
||||
self.cfg = [
|
||||
# k, exp, c, se, nl, s,
|
||||
[3, 16, 16, False, 'relu', 1],
|
||||
[3, 64, 24, False, 'relu', (2, 1)],
|
||||
[3, 72, 24, False, 'relu', 1],
|
||||
[5, 72, 40, True, 'relu', (2, 1)],
|
||||
[5, 120, 40, True, 'relu', 1],
|
||||
[5, 120, 40, True, 'relu', 1],
|
||||
[3, 240, 80, False, 'hard_swish', 1],
|
||||
[3, 200, 80, False, 'hard_swish', 1],
|
||||
[3, 184, 80, False, 'hard_swish', 1],
|
||||
[3, 184, 80, False, 'hard_swish', 1],
|
||||
[3, 480, 112, True, 'hard_swish', 1],
|
||||
[3, 672, 112, True, 'hard_swish', 1],
|
||||
[5, 672, 160, True, 'hard_swish', (2, 1)],
|
||||
[5, 960, 160, True, 'hard_swish', 1],
|
||||
[5, 960, 160, True, 'hard_swish', 1],
|
||||
]
|
||||
self.cls_ch_squeeze = 960
|
||||
self.cls_ch_expand = 1280
|
||||
elif model_name == "small":
|
||||
self.cfg = [
|
||||
# k, exp, c, se, nl, s,
|
||||
[3, 16, 16, True, 'relu', (2, 1)],
|
||||
[3, 72, 24, False, 'relu', (2, 1)],
|
||||
[3, 88, 24, False, 'relu', 1],
|
||||
[5, 96, 40, True, 'hard_swish', (2, 1)],
|
||||
[5, 240, 40, True, 'hard_swish', 1],
|
||||
[5, 240, 40, True, 'hard_swish', 1],
|
||||
[5, 120, 48, True, 'hard_swish', 1],
|
||||
[5, 144, 48, True, 'hard_swish', 1],
|
||||
[5, 288, 96, True, 'hard_swish', (2, 1)],
|
||||
[5, 576, 96, True, 'hard_swish', 1],
|
||||
[5, 576, 96, True, 'hard_swish', 1],
|
||||
]
|
||||
self.cls_ch_squeeze = 576
|
||||
self.cls_ch_expand = 1280
|
||||
else:
|
||||
raise NotImplementedError("mode[" + model_name +
|
||||
"_model] is not implemented!")
|
||||
|
||||
supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25]
|
||||
assert self.scale in supported_scale, \
|
||||
"supported scale are {} but input scale is {}".format(supported_scale, scale)
|
||||
|
||||
def __call__(self, input):
|
||||
scale = self.scale
|
||||
inplanes = self.inplanes
|
||||
cfg = self.cfg
|
||||
cls_ch_squeeze = self.cls_ch_squeeze
|
||||
cls_ch_expand = self.cls_ch_expand
|
||||
#conv1
|
||||
conv = self.conv_bn_layer(
|
||||
input,
|
||||
filter_size=3,
|
||||
num_filters=self.make_divisible(inplanes * scale),
|
||||
stride=2,
|
||||
padding=1,
|
||||
num_groups=1,
|
||||
if_act=True,
|
||||
act='hard_swish',
|
||||
name='conv1')
|
||||
i = 0
|
||||
inplanes = self.make_divisible(inplanes * scale)
|
||||
for layer_cfg in cfg:
|
||||
conv = self.residual_unit(
|
||||
input=conv,
|
||||
num_in_filter=inplanes,
|
||||
num_mid_filter=self.make_divisible(scale * layer_cfg[1]),
|
||||
num_out_filter=self.make_divisible(scale * layer_cfg[2]),
|
||||
act=layer_cfg[4],
|
||||
stride=layer_cfg[5],
|
||||
filter_size=layer_cfg[0],
|
||||
use_se=layer_cfg[3],
|
||||
name='conv' + str(i + 2))
|
||||
inplanes = self.make_divisible(scale * layer_cfg[2])
|
||||
i += 1
|
||||
|
||||
conv = self.conv_bn_layer(
|
||||
input=conv,
|
||||
filter_size=1,
|
||||
num_filters=self.make_divisible(scale * cls_ch_squeeze),
|
||||
stride=1,
|
||||
padding=0,
|
||||
num_groups=1,
|
||||
if_act=True,
|
||||
act='hard_swish',
|
||||
name='conv_last')
|
||||
|
||||
conv = fluid.layers.pool2d(
|
||||
input=conv,
|
||||
pool_size=2,
|
||||
pool_stride=2,
|
||||
pool_padding=0,
|
||||
pool_type='max')
|
||||
return conv
|
||||
|
||||
def conv_bn_layer(self,
|
||||
input,
|
||||
filter_size,
|
||||
num_filters,
|
||||
stride,
|
||||
padding,
|
||||
num_groups=1,
|
||||
if_act=True,
|
||||
act=None,
|
||||
name=None,
|
||||
use_cudnn=True,
|
||||
res_last_bn_init=False):
|
||||
conv = fluid.layers.conv2d(
|
||||
input=input,
|
||||
num_filters=num_filters,
|
||||
filter_size=filter_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
groups=num_groups,
|
||||
act=None,
|
||||
use_cudnn=use_cudnn,
|
||||
param_attr=ParamAttr(name=name + '_weights'),
|
||||
bias_attr=False)
|
||||
bn_name = name + '_bn'
|
||||
bn = fluid.layers.batch_norm(
|
||||
input=conv,
|
||||
param_attr=ParamAttr(
|
||||
name=bn_name + "_scale",
|
||||
regularizer=fluid.regularizer.L2DecayRegularizer(
|
||||
regularization_coeff=0.0)),
|
||||
bias_attr=ParamAttr(
|
||||
name=bn_name + "_offset",
|
||||
regularizer=fluid.regularizer.L2DecayRegularizer(
|
||||
regularization_coeff=0.0)),
|
||||
moving_mean_name=bn_name + '_mean',
|
||||
moving_variance_name=bn_name + '_variance')
|
||||
if if_act:
|
||||
if act == 'relu':
|
||||
bn = fluid.layers.relu(bn)
|
||||
elif act == 'hard_swish':
|
||||
bn = fluid.layers.hard_swish(bn)
|
||||
return bn
|
||||
|
||||
def make_divisible(self, v, divisor=8, min_value=None):
|
||||
if min_value is None:
|
||||
min_value = divisor
|
||||
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
|
||||
if new_v < 0.9 * v:
|
||||
new_v += divisor
|
||||
return new_v
|
||||
|
||||
def se_block(self, input, num_out_filter, ratio=4, name=None):
|
||||
num_mid_filter = num_out_filter // ratio
|
||||
pool = fluid.layers.pool2d(
|
||||
input=input, pool_type='avg', global_pooling=True, use_cudnn=False)
|
||||
conv1 = fluid.layers.conv2d(
|
||||
input=pool,
|
||||
filter_size=1,
|
||||
num_filters=num_mid_filter,
|
||||
act='relu',
|
||||
param_attr=ParamAttr(name=name + '_1_weights'),
|
||||
bias_attr=ParamAttr(name=name + '_1_offset'))
|
||||
conv2 = fluid.layers.conv2d(
|
||||
input=conv1,
|
||||
filter_size=1,
|
||||
num_filters=num_out_filter,
|
||||
act='hard_sigmoid',
|
||||
param_attr=ParamAttr(name=name + '_2_weights'),
|
||||
bias_attr=ParamAttr(name=name + '_2_offset'))
|
||||
scale = fluid.layers.elementwise_mul(x=input, y=conv2, axis=0)
|
||||
return scale
|
||||
|
||||
def residual_unit(self,
|
||||
input,
|
||||
num_in_filter,
|
||||
num_mid_filter,
|
||||
num_out_filter,
|
||||
stride,
|
||||
filter_size,
|
||||
act=None,
|
||||
use_se=False,
|
||||
name=None):
|
||||
|
||||
conv0 = self.conv_bn_layer(
|
||||
input=input,
|
||||
filter_size=1,
|
||||
num_filters=num_mid_filter,
|
||||
stride=1,
|
||||
padding=0,
|
||||
if_act=True,
|
||||
act=act,
|
||||
name=name + '_expand')
|
||||
|
||||
conv1 = self.conv_bn_layer(
|
||||
input=conv0,
|
||||
filter_size=filter_size,
|
||||
num_filters=num_mid_filter,
|
||||
stride=stride,
|
||||
padding=int((filter_size - 1) // 2),
|
||||
if_act=True,
|
||||
act=act,
|
||||
num_groups=num_mid_filter,
|
||||
use_cudnn=False,
|
||||
name=name + '_depthwise')
|
||||
if use_se:
|
||||
conv1 = self.se_block(
|
||||
input=conv1, num_out_filter=num_mid_filter, name=name + '_se')
|
||||
|
||||
conv2 = self.conv_bn_layer(
|
||||
input=conv1,
|
||||
filter_size=1,
|
||||
num_filters=num_out_filter,
|
||||
stride=1,
|
||||
padding=0,
|
||||
if_act=False,
|
||||
name=name + '_linear',
|
||||
res_last_bn_init=True)
|
||||
if num_in_filter != num_out_filter or stride != 1:
|
||||
return conv2
|
||||
else:
|
||||
return fluid.layers.elementwise_add(x=input, y=conv2, act=None)
|
|
@ -0,0 +1,271 @@
|
|||
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import math
|
||||
|
||||
import paddle
|
||||
import paddle.fluid as fluid
|
||||
from paddle.fluid.param_attr import ParamAttr
|
||||
|
||||
__all__ = [
|
||||
"ResNet", "ResNet18_vd", "ResNet34_vd", "ResNet50_vd", "ResNet101_vd",
|
||||
"ResNet152_vd", "ResNet200_vd"
|
||||
]
|
||||
|
||||
|
||||
class ResNet():
|
||||
def __init__(self, params):
|
||||
self.layers = params['layers']
|
||||
self.is_3x3 = True
|
||||
supported_layers = [18, 34, 50, 101, 152, 200]
|
||||
assert self.layers in supported_layers, \
|
||||
"supported layers are {} but input layer is {}".format(supported_layers, self.layers)
|
||||
|
||||
def __call__(self, input):
|
||||
is_3x3 = self.is_3x3
|
||||
layers = self.layers
|
||||
|
||||
if layers == 18:
|
||||
depth = [2, 2, 2, 2]
|
||||
elif layers == 34 or layers == 50:
|
||||
depth = [3, 4, 6, 3]
|
||||
elif layers == 101:
|
||||
depth = [3, 4, 23, 3]
|
||||
elif layers == 152:
|
||||
depth = [3, 8, 36, 3]
|
||||
elif layers == 200:
|
||||
depth = [3, 12, 48, 3]
|
||||
num_filters = [64, 128, 256, 512]
|
||||
if is_3x3 == False:
|
||||
conv = self.conv_bn_layer(
|
||||
input=input,
|
||||
num_filters=64,
|
||||
filter_size=7,
|
||||
stride=1,
|
||||
act='relu')
|
||||
else:
|
||||
conv = self.conv_bn_layer(
|
||||
input=input,
|
||||
num_filters=32,
|
||||
filter_size=3,
|
||||
stride=1,
|
||||
act='relu',
|
||||
name='conv1_1')
|
||||
conv = self.conv_bn_layer(
|
||||
input=conv,
|
||||
num_filters=32,
|
||||
filter_size=3,
|
||||
stride=1,
|
||||
act='relu',
|
||||
name='conv1_2')
|
||||
conv = self.conv_bn_layer(
|
||||
input=conv,
|
||||
num_filters=64,
|
||||
filter_size=3,
|
||||
stride=1,
|
||||
act='relu',
|
||||
name='conv1_3')
|
||||
|
||||
conv = fluid.layers.pool2d(
|
||||
input=conv,
|
||||
pool_size=3,
|
||||
pool_stride=2,
|
||||
pool_padding=1,
|
||||
pool_type='max')
|
||||
|
||||
if layers >= 50:
|
||||
for block in range(len(depth)):
|
||||
for i in range(depth[block]):
|
||||
if layers in [101, 152, 200] and block == 2:
|
||||
if i == 0:
|
||||
conv_name = "res" + str(block + 2) + "a"
|
||||
else:
|
||||
conv_name = "res" + str(block + 2) + "b" + str(i)
|
||||
else:
|
||||
conv_name = "res" + str(block + 2) + chr(97 + i)
|
||||
|
||||
if i == 0 and block != 0:
|
||||
stride = (2, 1)
|
||||
else:
|
||||
stride = (1, 1)
|
||||
|
||||
conv = self.bottleneck_block(
|
||||
input=conv,
|
||||
num_filters=num_filters[block],
|
||||
stride=stride,
|
||||
if_first=block == i == 0,
|
||||
name=conv_name)
|
||||
else:
|
||||
for block in range(len(depth)):
|
||||
for i in range(depth[block]):
|
||||
conv_name = "res" + str(block + 2) + chr(97 + i)
|
||||
|
||||
if i == 0 and block != 0:
|
||||
stride = (2, 1)
|
||||
else:
|
||||
stride = (1, 1)
|
||||
|
||||
conv = self.basic_block(
|
||||
input=conv,
|
||||
num_filters=num_filters[block],
|
||||
stride=stride,
|
||||
if_first=block == i == 0,
|
||||
name=conv_name)
|
||||
|
||||
conv = fluid.layers.pool2d(
|
||||
input=conv,
|
||||
pool_size=2,
|
||||
pool_stride=2,
|
||||
pool_padding=0,
|
||||
pool_type='max')
|
||||
|
||||
return conv
|
||||
|
||||
def conv_bn_layer(self,
|
||||
input,
|
||||
num_filters,
|
||||
filter_size,
|
||||
stride=1,
|
||||
groups=1,
|
||||
act=None,
|
||||
name=None):
|
||||
conv = fluid.layers.conv2d(
|
||||
input=input,
|
||||
num_filters=num_filters,
|
||||
filter_size=filter_size,
|
||||
stride=stride,
|
||||
padding=(filter_size - 1) // 2,
|
||||
groups=groups,
|
||||
act=None,
|
||||
param_attr=ParamAttr(name=name + "_weights"),
|
||||
bias_attr=False)
|
||||
if name == "conv1":
|
||||
bn_name = "bn_" + name
|
||||
else:
|
||||
bn_name = "bn" + name[3:]
|
||||
return fluid.layers.batch_norm(
|
||||
input=conv,
|
||||
act=act,
|
||||
param_attr=ParamAttr(name=bn_name + '_scale'),
|
||||
bias_attr=ParamAttr(bn_name + '_offset'),
|
||||
moving_mean_name=bn_name + '_mean',
|
||||
moving_variance_name=bn_name + '_variance')
|
||||
|
||||
def conv_bn_layer_new(self,
|
||||
input,
|
||||
num_filters,
|
||||
filter_size,
|
||||
stride=1,
|
||||
groups=1,
|
||||
act=None,
|
||||
name=None):
|
||||
pool = fluid.layers.pool2d(
|
||||
input=input,
|
||||
pool_size=stride,
|
||||
pool_stride=stride,
|
||||
pool_padding=0,
|
||||
pool_type='avg',
|
||||
ceil_mode=True)
|
||||
|
||||
conv = fluid.layers.conv2d(
|
||||
input=pool,
|
||||
num_filters=num_filters,
|
||||
filter_size=filter_size,
|
||||
stride=1,
|
||||
padding=(filter_size - 1) // 2,
|
||||
groups=groups,
|
||||
act=None,
|
||||
param_attr=ParamAttr(name=name + "_weights"),
|
||||
bias_attr=False)
|
||||
|
||||
if name == "conv1":
|
||||
bn_name = "bn_" + name
|
||||
else:
|
||||
bn_name = "bn" + name[3:]
|
||||
return fluid.layers.batch_norm(
|
||||
input=conv,
|
||||
act=act,
|
||||
param_attr=ParamAttr(name=bn_name + '_scale'),
|
||||
bias_attr=ParamAttr(bn_name + '_offset'),
|
||||
moving_mean_name=bn_name + '_mean',
|
||||
moving_variance_name=bn_name + '_variance')
|
||||
|
||||
def shortcut(self, input, ch_out, stride, name, if_first=False):
|
||||
ch_in = input.shape[1]
|
||||
if ch_in != ch_out or stride[0] != 1:
|
||||
if if_first:
|
||||
return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
|
||||
else:
|
||||
return self.conv_bn_layer_new(
|
||||
input, ch_out, 1, stride, name=name)
|
||||
elif if_first:
|
||||
return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
|
||||
else:
|
||||
return input
|
||||
|
||||
def bottleneck_block(self, input, num_filters, stride, name, if_first):
|
||||
conv0 = self.conv_bn_layer(
|
||||
input=input,
|
||||
num_filters=num_filters,
|
||||
filter_size=1,
|
||||
act='relu',
|
||||
name=name + "_branch2a")
|
||||
conv1 = self.conv_bn_layer(
|
||||
input=conv0,
|
||||
num_filters=num_filters,
|
||||
filter_size=3,
|
||||
stride=stride,
|
||||
act='relu',
|
||||
name=name + "_branch2b")
|
||||
conv2 = self.conv_bn_layer(
|
||||
input=conv1,
|
||||
num_filters=num_filters * 4,
|
||||
filter_size=1,
|
||||
act=None,
|
||||
name=name + "_branch2c")
|
||||
|
||||
short = self.shortcut(
|
||||
input,
|
||||
num_filters * 4,
|
||||
stride,
|
||||
if_first=if_first,
|
||||
name=name + "_branch1")
|
||||
|
||||
return fluid.layers.elementwise_add(x=short, y=conv2, act='relu')
|
||||
|
||||
def basic_block(self, input, num_filters, stride, name, if_first):
|
||||
conv0 = self.conv_bn_layer(
|
||||
input=input,
|
||||
num_filters=num_filters,
|
||||
filter_size=3,
|
||||
act='relu',
|
||||
stride=stride,
|
||||
name=name + "_branch2a")
|
||||
conv1 = self.conv_bn_layer(
|
||||
input=conv0,
|
||||
num_filters=num_filters,
|
||||
filter_size=3,
|
||||
act=None,
|
||||
name=name + "_branch2b")
|
||||
short = self.shortcut(
|
||||
input,
|
||||
num_filters,
|
||||
stride,
|
||||
if_first=if_first,
|
||||
name=name + "_branch1")
|
||||
return fluid.layers.elementwise_add(x=short, y=conv1, act='relu')
|
|
@ -0,0 +1,95 @@
|
|||
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
import paddle.fluid as fluid
|
||||
from paddle.fluid.param_attr import ParamAttr
|
||||
import math
|
||||
|
||||
|
||||
def get_para_bias_attr(l2_decay, k, name):
|
||||
regularizer = fluid.regularizer.L2Decay(l2_decay)
|
||||
stdv = 1.0 / math.sqrt(k * 1.0)
|
||||
initializer = fluid.initializer.Uniform(-stdv, stdv)
|
||||
para_attr = fluid.ParamAttr(
|
||||
regularizer=regularizer, initializer=initializer, name=name + "_w_attr")
|
||||
bias_attr = fluid.ParamAttr(
|
||||
regularizer=regularizer, initializer=initializer, name=name + "_b_attr")
|
||||
return [para_attr, bias_attr]
|
||||
|
||||
|
||||
def conv_bn_layer(input,
|
||||
num_filters,
|
||||
filter_size,
|
||||
stride=1,
|
||||
groups=1,
|
||||
act=None,
|
||||
name=None):
|
||||
conv = fluid.layers.conv2d(
|
||||
input=input,
|
||||
num_filters=num_filters,
|
||||
filter_size=filter_size,
|
||||
stride=stride,
|
||||
padding=(filter_size - 1) // 2,
|
||||
groups=groups,
|
||||
act=None,
|
||||
param_attr=ParamAttr(name=name + "_weights"),
|
||||
bias_attr=False,
|
||||
name=name + '.conv2d')
|
||||
|
||||
bn_name = "bn_" + name
|
||||
return fluid.layers.batch_norm(
|
||||
input=conv,
|
||||
act=act,
|
||||
name=bn_name + '.output',
|
||||
param_attr=ParamAttr(name=bn_name + '_scale'),
|
||||
bias_attr=ParamAttr(bn_name + '_offset'),
|
||||
moving_mean_name=bn_name + '_mean',
|
||||
moving_variance_name=bn_name + '_variance')
|
||||
|
||||
|
||||
def deconv_bn_layer(input,
|
||||
num_filters,
|
||||
filter_size=4,
|
||||
stride=2,
|
||||
act='relu',
|
||||
name=None):
|
||||
deconv = fluid.layers.conv2d_transpose(
|
||||
input=input,
|
||||
num_filters=num_filters,
|
||||
filter_size=filter_size,
|
||||
stride=stride,
|
||||
padding=1,
|
||||
act=None,
|
||||
param_attr=ParamAttr(name=name + "_weights"),
|
||||
bias_attr=False,
|
||||
name=name + '.deconv2d')
|
||||
bn_name = "bn_" + name
|
||||
return fluid.layers.batch_norm(
|
||||
input=deconv,
|
||||
act=act,
|
||||
name=bn_name + '.output',
|
||||
param_attr=ParamAttr(name=bn_name + '_scale'),
|
||||
bias_attr=ParamAttr(bn_name + '_offset'),
|
||||
moving_mean_name=bn_name + '_mean',
|
||||
moving_variance_name=bn_name + '_variance')
|
||||
|
||||
|
||||
def create_tmp_var(program, name, dtype, shape, lod_level=0):
|
||||
return program.current_block().create_var(
|
||||
name=name, dtype=dtype, shape=shape, lod_level=lod_level)
|
|
@ -0,0 +1,206 @@
|
|||
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import math
|
||||
|
||||
import paddle.fluid as fluid
|
||||
|
||||
|
||||
class DBHead(object):
|
||||
"""
|
||||
Differentiable Binarization (DB) for text detection:
|
||||
see https://arxiv.org/abs/1911.08947
|
||||
args:
|
||||
params(dict): super parameters for build DB network
|
||||
"""
|
||||
|
||||
def __init__(self, params):
|
||||
self.k = params['k']
|
||||
self.inner_channels = params['inner_channels']
|
||||
self.C, self.H, self.W = params['image_shape']
|
||||
print(self.C, self.H, self.W)
|
||||
|
||||
def binarize(self, x):
|
||||
conv1 = fluid.layers.conv2d(
|
||||
input=x,
|
||||
num_filters=self.inner_channels // 4,
|
||||
filter_size=3,
|
||||
padding=1,
|
||||
param_attr=fluid.initializer.MSRAInitializer(uniform=False),
|
||||
bias_attr=False)
|
||||
conv_bn1 = fluid.layers.batch_norm(
|
||||
input=conv1,
|
||||
param_attr=fluid.initializer.ConstantInitializer(value=1.0),
|
||||
bias_attr=fluid.initializer.ConstantInitializer(value=1e-4),
|
||||
act="relu")
|
||||
conv2 = fluid.layers.conv2d_transpose(
|
||||
input=conv_bn1,
|
||||
num_filters=self.inner_channels // 4,
|
||||
filter_size=2,
|
||||
stride=2,
|
||||
param_attr=fluid.initializer.MSRAInitializer(uniform=False),
|
||||
bias_attr=self._get_bias_attr(0.0004, conv_bn1.shape[1], "conv2"),
|
||||
act=None)
|
||||
conv_bn2 = fluid.layers.batch_norm(
|
||||
input=conv2,
|
||||
param_attr=fluid.initializer.ConstantInitializer(value=1.0),
|
||||
bias_attr=fluid.initializer.ConstantInitializer(value=1e-4),
|
||||
act="relu")
|
||||
conv3 = fluid.layers.conv2d_transpose(
|
||||
input=conv_bn2,
|
||||
num_filters=1,
|
||||
filter_size=2,
|
||||
stride=2,
|
||||
param_attr=fluid.initializer.MSRAInitializer(uniform=False),
|
||||
bias_attr=self._get_bias_attr(0.0004, conv_bn2.shape[1], "conv3"),
|
||||
act=None)
|
||||
out = fluid.layers.sigmoid(conv3)
|
||||
return out
|
||||
|
||||
def thresh(self, x):
|
||||
conv1 = fluid.layers.conv2d(
|
||||
input=x,
|
||||
num_filters=self.inner_channels // 4,
|
||||
filter_size=3,
|
||||
padding=1,
|
||||
param_attr=fluid.initializer.MSRAInitializer(uniform=False),
|
||||
bias_attr=False)
|
||||
conv_bn1 = fluid.layers.batch_norm(
|
||||
input=conv1,
|
||||
param_attr=fluid.initializer.ConstantInitializer(value=1.0),
|
||||
bias_attr=fluid.initializer.ConstantInitializer(value=1e-4),
|
||||
act="relu")
|
||||
conv2 = fluid.layers.conv2d_transpose(
|
||||
input=conv_bn1,
|
||||
num_filters=self.inner_channels // 4,
|
||||
filter_size=2,
|
||||
stride=2,
|
||||
param_attr=fluid.initializer.MSRAInitializer(uniform=False),
|
||||
bias_attr=self._get_bias_attr(0.0004, conv_bn1.shape[1], "conv2"),
|
||||
act=None)
|
||||
conv_bn2 = fluid.layers.batch_norm(
|
||||
input=conv2,
|
||||
param_attr=fluid.initializer.ConstantInitializer(value=1.0),
|
||||
bias_attr=fluid.initializer.ConstantInitializer(value=1e-4),
|
||||
act="relu")
|
||||
conv3 = fluid.layers.conv2d_transpose(
|
||||
input=conv_bn2,
|
||||
num_filters=1,
|
||||
filter_size=2,
|
||||
stride=2,
|
||||
param_attr=fluid.initializer.MSRAInitializer(uniform=False),
|
||||
bias_attr=self._get_bias_attr(0.0004, conv_bn2.shape[1], "conv3"),
|
||||
act=None)
|
||||
out = fluid.layers.sigmoid(conv3)
|
||||
return out
|
||||
|
||||
def _get_bias_attr(self, l2_decay, k, name, gradient_clip=None):
|
||||
regularizer = fluid.regularizer.L2Decay(l2_decay)
|
||||
stdv = 1.0 / math.sqrt(k * 1.0)
|
||||
initializer = fluid.initializer.Uniform(-stdv, stdv)
|
||||
bias_attr = fluid.ParamAttr(
|
||||
regularizer=regularizer,
|
||||
gradient_clip=gradient_clip,
|
||||
initializer=initializer,
|
||||
name=name + "_b_attr")
|
||||
return bias_attr
|
||||
|
||||
def step_function(self, x, y):
|
||||
return fluid.layers.reciprocal(1 + fluid.layers.exp(-self.k * (x - y)))
|
||||
|
||||
def __call__(self, conv_features, mode="train"):
|
||||
c2, c3, c4, c5 = conv_features
|
||||
param_attr = fluid.initializer.MSRAInitializer(uniform=False)
|
||||
in5 = fluid.layers.conv2d(
|
||||
input=c5,
|
||||
num_filters=self.inner_channels,
|
||||
filter_size=1,
|
||||
param_attr=param_attr,
|
||||
bias_attr=False)
|
||||
in4 = fluid.layers.conv2d(
|
||||
input=c4,
|
||||
num_filters=self.inner_channels,
|
||||
filter_size=1,
|
||||
param_attr=param_attr,
|
||||
bias_attr=False)
|
||||
in3 = fluid.layers.conv2d(
|
||||
input=c3,
|
||||
num_filters=self.inner_channels,
|
||||
filter_size=1,
|
||||
param_attr=param_attr,
|
||||
bias_attr=False)
|
||||
in2 = fluid.layers.conv2d(
|
||||
input=c2,
|
||||
num_filters=self.inner_channels,
|
||||
filter_size=1,
|
||||
param_attr=param_attr,
|
||||
bias_attr=False)
|
||||
|
||||
out4 = fluid.layers.elementwise_add(
|
||||
x=fluid.layers.resize_nearest(
|
||||
input=in5, scale=2), y=in4) # 1/16
|
||||
out3 = fluid.layers.elementwise_add(
|
||||
x=fluid.layers.resize_nearest(
|
||||
input=out4, scale=2), y=in3) # 1/8
|
||||
out2 = fluid.layers.elementwise_add(
|
||||
x=fluid.layers.resize_nearest(
|
||||
input=out3, scale=2), y=in2) # 1/4
|
||||
|
||||
p5 = fluid.layers.conv2d(
|
||||
input=in5,
|
||||
num_filters=self.inner_channels // 4,
|
||||
filter_size=3,
|
||||
padding=1,
|
||||
param_attr=param_attr,
|
||||
bias_attr=False)
|
||||
p5 = fluid.layers.resize_nearest(input=p5, scale=8)
|
||||
p4 = fluid.layers.conv2d(
|
||||
input=out4,
|
||||
num_filters=self.inner_channels // 4,
|
||||
filter_size=3,
|
||||
padding=1,
|
||||
param_attr=param_attr,
|
||||
bias_attr=False)
|
||||
p4 = fluid.layers.resize_nearest(input=p4, scale=4)
|
||||
p3 = fluid.layers.conv2d(
|
||||
input=out3,
|
||||
num_filters=self.inner_channels // 4,
|
||||
filter_size=3,
|
||||
padding=1,
|
||||
param_attr=param_attr,
|
||||
bias_attr=False)
|
||||
p3 = fluid.layers.resize_nearest(input=p3, scale=2)
|
||||
p2 = fluid.layers.conv2d(
|
||||
input=out2,
|
||||
num_filters=self.inner_channels // 4,
|
||||
filter_size=3,
|
||||
padding=1,
|
||||
param_attr=param_attr,
|
||||
bias_attr=False)
|
||||
|
||||
fuse = fluid.layers.concat(input=[p5, p4, p3, p2], axis=1)
|
||||
shrink_maps = self.binarize(fuse)
|
||||
if mode != "train":
|
||||
return shrink_maps
|
||||
threshold_maps = self.thresh(fuse)
|
||||
binary_maps = self.step_function(shrink_maps, threshold_maps)
|
||||
y = fluid.layers.concat(
|
||||
input=[shrink_maps, threshold_maps, binary_maps], axis=1)
|
||||
predicts = {}
|
||||
predicts['maps'] = y
|
||||
return predicts
|
|
@ -0,0 +1,116 @@
|
|||
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle.fluid as fluid
|
||||
from ..common_functions import conv_bn_layer, deconv_bn_layer
|
||||
|
||||
|
||||
class EASTHead(object):
|
||||
"""
|
||||
EAST: An Efficient and Accurate Scene Text Detector
|
||||
see arxiv: https://arxiv.org/abs/1704.03155
|
||||
args:
|
||||
params(dict): the super parameters for network build
|
||||
"""
|
||||
|
||||
def __init__(self, params):
|
||||
|
||||
self.model_name = params['model_name']
|
||||
|
||||
def unet_fusion(self, inputs):
|
||||
f = inputs[::-1]
|
||||
if self.model_name == "large":
|
||||
num_outputs = [128, 128, 128, 128]
|
||||
else:
|
||||
num_outputs = [64, 64, 64, 64]
|
||||
g = [None, None, None, None]
|
||||
h = [None, None, None, None]
|
||||
for i in range(4):
|
||||
if i == 0:
|
||||
h[i] = f[i]
|
||||
else:
|
||||
h[i] = fluid.layers.concat([g[i - 1], f[i]], axis=1)
|
||||
h[i] = conv_bn_layer(
|
||||
input=h[i],
|
||||
num_filters=num_outputs[i],
|
||||
filter_size=3,
|
||||
stride=1,
|
||||
act='relu',
|
||||
name="unet_h_%d" % (i))
|
||||
if i <= 2:
|
||||
#can be replaced with unpool
|
||||
g[i] = deconv_bn_layer(
|
||||
input=h[i],
|
||||
num_filters=num_outputs[i],
|
||||
name="unet_g_%d" % (i))
|
||||
else:
|
||||
g[i] = conv_bn_layer(
|
||||
input=h[i],
|
||||
num_filters=num_outputs[i],
|
||||
filter_size=3,
|
||||
stride=1,
|
||||
act='relu',
|
||||
name="unet_g_%d" % (i))
|
||||
return g[3]
|
||||
|
||||
def detector_header(self, f_common):
|
||||
if self.model_name == "large":
|
||||
num_outputs = [128, 64, 1, 8]
|
||||
else:
|
||||
num_outputs = [64, 32, 1, 8]
|
||||
f_det = conv_bn_layer(
|
||||
input=f_common,
|
||||
num_filters=num_outputs[0],
|
||||
filter_size=3,
|
||||
stride=1,
|
||||
act='relu',
|
||||
name="det_head1")
|
||||
f_det = conv_bn_layer(
|
||||
input=f_det,
|
||||
num_filters=num_outputs[1],
|
||||
filter_size=3,
|
||||
stride=1,
|
||||
act='relu',
|
||||
name="det_head2")
|
||||
#f_score
|
||||
f_score = conv_bn_layer(
|
||||
input=f_det,
|
||||
num_filters=num_outputs[2],
|
||||
filter_size=1,
|
||||
stride=1,
|
||||
act=None,
|
||||
name="f_score")
|
||||
f_score = fluid.layers.sigmoid(f_score)
|
||||
#f_geo
|
||||
f_geo = conv_bn_layer(
|
||||
input=f_det,
|
||||
num_filters=num_outputs[3],
|
||||
filter_size=1,
|
||||
stride=1,
|
||||
act=None,
|
||||
name="f_geo")
|
||||
f_geo = (fluid.layers.sigmoid(f_geo) - 0.5) * 2 * 800
|
||||
return f_score, f_geo
|
||||
|
||||
def __call__(self, inputs):
|
||||
f_common = self.unet_fusion(inputs)
|
||||
f_score, f_geo = self.detector_header(f_common)
|
||||
predicts = {}
|
||||
predicts['f_score'] = f_score
|
||||
predicts['f_geo'] = f_geo
|
||||
return predicts
|
|
@ -0,0 +1,232 @@
|
|||
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import math
|
||||
|
||||
import paddle
|
||||
import paddle.fluid as fluid
|
||||
import paddle.fluid.layers as layers
|
||||
from .rec_seq_encoder import SequenceEncoder
|
||||
import numpy as np
|
||||
|
||||
|
||||
class AttentionPredict(object):
|
||||
def __init__(self, params):
|
||||
super(AttentionPredict, self).__init__()
|
||||
self.char_num = params['char_num']
|
||||
self.encoder = SequenceEncoder(params)
|
||||
self.decoder_size = params['Attention']['decoder_size']
|
||||
self.word_vector_dim = params['Attention']['word_vector_dim']
|
||||
self.encoder_type = params['encoder_type']
|
||||
self.max_length = params['max_text_length']
|
||||
|
||||
def simple_attention(self, encoder_vec, encoder_proj, decoder_state,
|
||||
decoder_size):
|
||||
decoder_state_proj = layers.fc(input=decoder_state,
|
||||
size=decoder_size,
|
||||
bias_attr=False,
|
||||
name="decoder_state_proj_fc")
|
||||
decoder_state_expand = layers.sequence_expand(
|
||||
x=decoder_state_proj, y=encoder_proj)
|
||||
concated = layers.elementwise_add(encoder_proj, decoder_state_expand)
|
||||
concated = layers.tanh(x=concated)
|
||||
attention_weights = layers.fc(input=concated,
|
||||
size=1,
|
||||
act=None,
|
||||
bias_attr=False,
|
||||
name="attention_weights_fc")
|
||||
attention_weights = layers.sequence_softmax(input=attention_weights)
|
||||
weigths_reshape = layers.reshape(x=attention_weights, shape=[-1])
|
||||
scaled = layers.elementwise_mul(
|
||||
x=encoder_vec, y=weigths_reshape, axis=0)
|
||||
context = layers.sequence_pool(input=scaled, pool_type='sum')
|
||||
return context
|
||||
|
||||
def gru_decoder_with_attention(self, target_embedding, encoder_vec,
|
||||
encoder_proj, decoder_boot, decoder_size,
|
||||
char_num):
|
||||
rnn = layers.DynamicRNN()
|
||||
with rnn.block():
|
||||
current_word = rnn.step_input(target_embedding)
|
||||
encoder_vec = rnn.static_input(encoder_vec)
|
||||
encoder_proj = rnn.static_input(encoder_proj)
|
||||
hidden_mem = rnn.memory(init=decoder_boot, need_reorder=True)
|
||||
context = self.simple_attention(encoder_vec, encoder_proj,
|
||||
hidden_mem, decoder_size)
|
||||
fc_1 = layers.fc(input=context,
|
||||
size=decoder_size * 3,
|
||||
bias_attr=False,
|
||||
name="rnn_fc1")
|
||||
fc_2 = layers.fc(input=current_word,
|
||||
size=decoder_size * 3,
|
||||
bias_attr=False,
|
||||
name="rnn_fc2")
|
||||
decoder_inputs = fc_1 + fc_2
|
||||
h, _, _ = layers.gru_unit(
|
||||
input=decoder_inputs, hidden=hidden_mem, size=decoder_size * 3)
|
||||
rnn.update_memory(hidden_mem, h)
|
||||
out = layers.fc(input=h,
|
||||
size=char_num,
|
||||
bias_attr=True,
|
||||
act='softmax',
|
||||
name="rnn_out_fc")
|
||||
rnn.output(out)
|
||||
return rnn()
|
||||
|
||||
def gru_attention_infer(self, decoder_boot, max_length, char_num,
|
||||
word_vector_dim, encoded_vector, encoded_proj,
|
||||
decoder_size):
|
||||
init_state = decoder_boot
|
||||
beam_size = 1
|
||||
array_len = layers.fill_constant(
|
||||
shape=[1], dtype='int64', value=max_length)
|
||||
counter = layers.zeros(shape=[1], dtype='int64', force_cpu=True)
|
||||
|
||||
# fill the first element with init_state
|
||||
state_array = layers.create_array('float32')
|
||||
layers.array_write(init_state, array=state_array, i=counter)
|
||||
|
||||
# ids, scores as memory
|
||||
ids_array = layers.create_array('int64')
|
||||
scores_array = layers.create_array('float32')
|
||||
rois_shape = layers.shape(init_state)
|
||||
batch_size = layers.slice(
|
||||
rois_shape, axes=[0], starts=[0], ends=[1]) + 1
|
||||
lod_level = layers.range(
|
||||
start=0, end=batch_size, step=1, dtype=batch_size.dtype)
|
||||
|
||||
init_ids = layers.fill_constant_batch_size_like(
|
||||
input=init_state, shape=[-1, 1], value=0, dtype='int64')
|
||||
init_ids = layers.lod_reset(init_ids, lod_level)
|
||||
init_ids = layers.lod_append(init_ids, lod_level)
|
||||
|
||||
init_scores = layers.fill_constant_batch_size_like(
|
||||
input=init_state, shape=[-1, 1], value=1, dtype='float32')
|
||||
init_scores = layers.lod_reset(init_scores, init_ids)
|
||||
layers.array_write(init_ids, array=ids_array, i=counter)
|
||||
layers.array_write(init_scores, array=scores_array, i=counter)
|
||||
|
||||
full_ids = fluid.layers.fill_constant_batch_size_like(
|
||||
input=init_state, shape=[-1, 1], dtype='int64', value=1)
|
||||
|
||||
cond = layers.less_than(x=counter, y=array_len)
|
||||
while_op = layers.While(cond=cond)
|
||||
with while_op.block():
|
||||
pre_ids = layers.array_read(array=ids_array, i=counter)
|
||||
pre_state = layers.array_read(array=state_array, i=counter)
|
||||
pre_score = layers.array_read(array=scores_array, i=counter)
|
||||
pre_ids_emb = layers.embedding(
|
||||
input=pre_ids,
|
||||
size=[char_num, word_vector_dim],
|
||||
dtype='float32')
|
||||
|
||||
context = self.simple_attention(encoded_vector, encoded_proj,
|
||||
pre_state, decoder_size)
|
||||
|
||||
# expand the recursive_sequence_lengths of pre_state
|
||||
# to be the same with pre_score
|
||||
pre_state_expanded = layers.sequence_expand(pre_state, pre_score)
|
||||
context_expanded = layers.sequence_expand(context, pre_score)
|
||||
|
||||
fc_1 = layers.fc(input=context_expanded,
|
||||
size=decoder_size * 3,
|
||||
bias_attr=False,
|
||||
name="rnn_fc1")
|
||||
|
||||
fc_2 = layers.fc(input=pre_ids_emb,
|
||||
size=decoder_size * 3,
|
||||
bias_attr=False,
|
||||
name="rnn_fc2")
|
||||
|
||||
decoder_inputs = fc_1 + fc_2
|
||||
current_state, _, _ = layers.gru_unit(
|
||||
input=decoder_inputs,
|
||||
hidden=pre_state_expanded,
|
||||
size=decoder_size * 3)
|
||||
current_state_with_lod = layers.lod_reset(
|
||||
x=current_state, y=pre_score)
|
||||
# use score to do beam search
|
||||
current_score = layers.fc(input=current_state_with_lod,
|
||||
size=char_num,
|
||||
bias_attr=True,
|
||||
act='softmax',
|
||||
name="rnn_out_fc")
|
||||
topk_scores, topk_indices = layers.topk(current_score, k=beam_size)
|
||||
|
||||
new_ids = fluid.layers.concat([full_ids, topk_indices], axis=1)
|
||||
fluid.layers.assign(new_ids, full_ids)
|
||||
|
||||
layers.increment(x=counter, value=1, in_place=True)
|
||||
|
||||
# update the memories
|
||||
layers.array_write(current_state, array=state_array, i=counter)
|
||||
layers.array_write(topk_indices, array=ids_array, i=counter)
|
||||
layers.array_write(topk_scores, array=scores_array, i=counter)
|
||||
|
||||
# update the break condition:
|
||||
# up to the max length or all candidates of
|
||||
# source sentences have ended.
|
||||
length_cond = layers.less_than(x=counter, y=array_len)
|
||||
finish_cond = layers.logical_not(layers.is_empty(x=topk_indices))
|
||||
layers.logical_and(x=length_cond, y=finish_cond, out=cond)
|
||||
return full_ids
|
||||
|
||||
def __call__(self, inputs, labels=None, mode=None):
|
||||
encoder_features = self.encoder(inputs)
|
||||
char_num = self.char_num
|
||||
word_vector_dim = self.word_vector_dim
|
||||
decoder_size = self.decoder_size
|
||||
|
||||
if self.encoder_type == "reshape":
|
||||
encoder_input = encoder_features
|
||||
encoded_vector = encoder_features
|
||||
else:
|
||||
encoder_input = encoder_features[1]
|
||||
encoded_vector = layers.concat(encoder_features, axis=1)
|
||||
encoded_proj = layers.fc(input=encoded_vector,
|
||||
size=decoder_size,
|
||||
bias_attr=False,
|
||||
name="encoded_proj_fc")
|
||||
backward_first = layers.sequence_pool(
|
||||
input=encoder_input, pool_type='first')
|
||||
decoder_boot = layers.fc(input=backward_first,
|
||||
size=decoder_size,
|
||||
bias_attr=False,
|
||||
act="relu",
|
||||
name='decoder_boot')
|
||||
|
||||
if mode == "train":
|
||||
label_in = labels['label_in']
|
||||
label_out = labels['label_out']
|
||||
label_in = layers.cast(x=label_in, dtype='int64')
|
||||
trg_embedding = layers.embedding(
|
||||
input=label_in,
|
||||
size=[char_num, word_vector_dim],
|
||||
dtype='float32')
|
||||
predict = self.gru_decoder_with_attention(
|
||||
trg_embedding, encoded_vector, encoded_proj, decoder_boot,
|
||||
decoder_size, char_num)
|
||||
_, decoded_out = layers.topk(input=predict, k=1)
|
||||
decoded_out = layers.lod_reset(decoded_out, y=label_out)
|
||||
predicts = {'predict': predict, 'decoded_out': decoded_out}
|
||||
else:
|
||||
ids = self.gru_attention_infer(
|
||||
decoder_boot, self.max_length, char_num, word_vector_dim,
|
||||
encoded_vector, encoded_proj, decoder_size)
|
||||
predicts = {'decoded_out': ids}
|
||||
return predicts
|
|
@ -0,0 +1,51 @@
|
|||
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import math
|
||||
|
||||
import paddle
|
||||
import paddle.fluid as fluid
|
||||
from paddle.fluid.param_attr import ParamAttr
|
||||
from .rec_seq_encoder import SequenceEncoder
|
||||
from ..common_functions import get_para_bias_attr
|
||||
import numpy as np
|
||||
|
||||
|
||||
class CTCPredict(object):
|
||||
def __init__(self, params):
|
||||
super(CTCPredict, self).__init__()
|
||||
self.char_num = params['char_num']
|
||||
self.encoder = SequenceEncoder(params)
|
||||
self.encoder_type = params['encoder_type']
|
||||
|
||||
def __call__(self, inputs, labels=None, mode=None):
|
||||
encoder_features = self.encoder(inputs)
|
||||
if self.encoder_type != "reshape":
|
||||
encoder_features = fluid.layers.concat(encoder_features, axis=1)
|
||||
name = "ctc_fc"
|
||||
para_attr, bias_attr = get_para_bias_attr(
|
||||
l2_decay=0.0004, k=encoder_features.shape[1], name=name)
|
||||
predict = fluid.layers.fc(input=encoder_features,
|
||||
size=self.char_num + 1,
|
||||
param_attr=para_attr,
|
||||
bias_attr=bias_attr,
|
||||
name=name)
|
||||
decoded_out = fluid.layers.ctc_greedy_decoder(
|
||||
input=predict, blank=self.char_num)
|
||||
predicts = {'predict': predict, 'decoded_out': decoded_out}
|
||||
return predicts
|
|
@ -0,0 +1,100 @@
|
|||
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import math
|
||||
import paddle.fluid as fluid
|
||||
import paddle.fluid.layers as layers
|
||||
|
||||
|
||||
class EncoderWithReshape(object):
|
||||
def __init__(self, params):
|
||||
super(EncoderWithReshape, self).__init__()
|
||||
|
||||
def __call__(self, inputs):
|
||||
sliced_feature = layers.im2sequence(
|
||||
input=inputs,
|
||||
stride=[1, 1],
|
||||
filter_size=[inputs.shape[2], 1],
|
||||
name="sliced_feature")
|
||||
return sliced_feature
|
||||
|
||||
|
||||
class EncoderWithRNN(object):
|
||||
def __init__(self, params):
|
||||
super(EncoderWithRNN, self).__init__()
|
||||
self.rnn_hidden_size = params['SeqRNN']['hidden_size']
|
||||
|
||||
def __call__(self, inputs):
|
||||
lstm_list = []
|
||||
name_prefix = "lstm"
|
||||
rnn_hidden_size = self.rnn_hidden_size
|
||||
for no in range(1, 3):
|
||||
if no == 1:
|
||||
is_reverse = False
|
||||
else:
|
||||
is_reverse = True
|
||||
name = "%s_st1_fc%d" % (name_prefix, no)
|
||||
fc = layers.fc(input=inputs,
|
||||
size=rnn_hidden_size * 4,
|
||||
param_attr=fluid.ParamAttr(name=name + "_w"),
|
||||
bias_attr=fluid.ParamAttr(name=name + "_b"),
|
||||
name=name)
|
||||
name = "%s_st1_out%d" % (name_prefix, no)
|
||||
lstm, _ = layers.dynamic_lstm(
|
||||
input=fc,
|
||||
size=rnn_hidden_size * 4,
|
||||
is_reverse=is_reverse,
|
||||
param_attr=fluid.ParamAttr(name=name + "_w"),
|
||||
bias_attr=fluid.ParamAttr(name=name + "_b"),
|
||||
use_peepholes=False)
|
||||
name = "%s_st2_fc%d" % (name_prefix, no)
|
||||
fc = layers.fc(input=lstm,
|
||||
size=rnn_hidden_size * 4,
|
||||
param_attr=fluid.ParamAttr(name=name + "_w"),
|
||||
bias_attr=fluid.ParamAttr(name=name + "_b"),
|
||||
name=name)
|
||||
name = "%s_st2_out%d" % (name_prefix, no)
|
||||
lstm, _ = layers.dynamic_lstm(
|
||||
input=fc,
|
||||
size=rnn_hidden_size * 4,
|
||||
is_reverse=is_reverse,
|
||||
param_attr=fluid.ParamAttr(name=name + "_w"),
|
||||
bias_attr=fluid.ParamAttr(name=name + "_b"),
|
||||
use_peepholes=False)
|
||||
lstm_list.append(lstm)
|
||||
return lstm_list
|
||||
|
||||
|
||||
class SequenceEncoder(object):
|
||||
def __init__(self, params):
|
||||
super(SequenceEncoder, self).__init__()
|
||||
self.encoder_type = params['encoder_type']
|
||||
self.encoder_reshape = EncoderWithReshape(params)
|
||||
if self.encoder_type == "rnn":
|
||||
self.encoder_rnn = EncoderWithRNN(params)
|
||||
|
||||
def __call__(self, inputs):
|
||||
if self.encoder_type == "reshape":
|
||||
encoder_features = self.encoder_reshape(inputs)
|
||||
elif self.encoder_type == "rnn":
|
||||
inputs = self.encoder_reshape(inputs)
|
||||
encoder_features = self.encoder_rnn(inputs)
|
||||
else:
|
||||
assert False, "Unsupport encoder_type:%s"\
|
||||
% self.encoder_type
|
||||
return encoder_features
|
|
@ -0,0 +1,116 @@
|
|||
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
|
||||
import paddle.fluid as fluid
|
||||
|
||||
|
||||
def BalanceLoss(pred,
|
||||
gt,
|
||||
mask,
|
||||
balance_loss=True,
|
||||
main_loss_type="DiceLoss",
|
||||
negative_ratio=3,
|
||||
return_origin=False,
|
||||
eps=1e-6):
|
||||
"""
|
||||
The BalanceLoss for Differentiable Binarization text detection
|
||||
args:
|
||||
pred (variable): predicted feature maps.
|
||||
gt (variable): ground truth feature maps.
|
||||
mask (variable): masked maps.
|
||||
balance_loss (bool): whether balance loss or not, default is True
|
||||
main_loss_type (str): can only be one of ['CrossEntropy','DiceLoss',
|
||||
'Euclidean','BCELoss', 'MaskL1Loss'], default is 'DiceLoss'.
|
||||
negative_ratio (int|float): float, default is 3.
|
||||
return_origin (bool): whether return unbalanced loss or not, default is False.
|
||||
eps (float): default is 1e-6.
|
||||
return: (variable) balanced loss
|
||||
"""
|
||||
positive = gt * mask
|
||||
negative = (1 - gt) * mask
|
||||
|
||||
positive_count = fluid.layers.reduce_sum(positive)
|
||||
positive_count_int = fluid.layers.cast(positive_count, dtype=np.int32)
|
||||
negative_count = min(
|
||||
fluid.layers.reduce_sum(negative), positive_count * negative_ratio)
|
||||
negative_count_int = fluid.layers.cast(negative_count, dtype=np.int32)
|
||||
|
||||
if main_loss_type == "CrossEntropy":
|
||||
loss = fluid.layers.cross_entropy(input=pred, label=gt, soft_label=True)
|
||||
loss = fluid.layers.reduce_mean(loss)
|
||||
elif main_loss_type == "Euclidean":
|
||||
loss = fluid.layers.square(pred - gt)
|
||||
loss = fluid.layers.reduce_mean(loss)
|
||||
elif main_loss_type == "DiceLoss":
|
||||
loss = DiceLoss(pred, gt, mask)
|
||||
elif main_loss_type == "BCELoss":
|
||||
loss = fluid.layers.sigmoid_cross_entropy_with_logits(pred, label=gt)
|
||||
elif main_loss_type == "MaskL1Loss":
|
||||
loss = MaskL1Loss(pred, gt, mask)
|
||||
else:
|
||||
loss_type = [
|
||||
'CrossEntropy', 'DiceLoss', 'Euclidean', 'BCELoss', 'MaskL1Loss'
|
||||
]
|
||||
raise Exception("main_loss_type in BalanceLoss() can only be one of {}".
|
||||
format(loss_type))
|
||||
|
||||
if not balance_loss:
|
||||
return loss
|
||||
|
||||
positive_loss = positive * loss
|
||||
negative_loss = negative * loss
|
||||
negative_loss = fluid.layers.reshape(negative_loss, shape=[-1])
|
||||
negative_loss, _ = fluid.layers.topk(negative_loss, k=negative_count_int)
|
||||
balance_loss = (fluid.layers.reduce_sum(positive_loss) +
|
||||
fluid.layers.reduce_sum(negative_loss)) / (
|
||||
positive_count + negative_count + eps)
|
||||
|
||||
if return_origin:
|
||||
return balance_loss, loss
|
||||
return balance_loss
|
||||
|
||||
|
||||
def DiceLoss(pred, gt, mask, weights=None, eps=1e-6):
|
||||
"""
|
||||
DiceLoss function.
|
||||
"""
|
||||
|
||||
assert pred.shape == gt.shape
|
||||
assert pred.shape == mask.shape
|
||||
if weights is not None:
|
||||
assert weights.shape == mask.shape
|
||||
mask = weights * mask
|
||||
intersection = fluid.layers.reduce_sum(pred * gt * mask)
|
||||
|
||||
union = fluid.layers.reduce_sum(pred * mask) + fluid.layers.reduce_sum(
|
||||
gt * mask) + eps
|
||||
loss = 1 - 2.0 * intersection / union
|
||||
assert loss <= 1
|
||||
return loss
|
||||
|
||||
|
||||
def MaskL1Loss(pred, gt, mask, eps=1e-6):
|
||||
"""
|
||||
Mask L1 Loss
|
||||
"""
|
||||
loss = fluid.layers.reduce_sum((fluid.layers.abs(pred - gt) * mask)) / (
|
||||
fluid.layers.reduce_sum(mask) + eps)
|
||||
loss = fluid.layers.reduce_mean(loss)
|
||||
return loss
|
|
@ -0,0 +1,68 @@
|
|||
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from .det_basic_loss import BalanceLoss, MaskL1Loss, DiceLoss
|
||||
|
||||
|
||||
class DBLoss(object):
|
||||
"""
|
||||
Differentiable Binarization (DB) Loss Function
|
||||
args:
|
||||
param (dict): the super paramter for DB Loss
|
||||
"""
|
||||
|
||||
def __init__(self, params):
|
||||
super(DBLoss, self).__init__()
|
||||
self.balance_loss = params['balance_loss']
|
||||
self.main_loss_type = params['main_loss_type']
|
||||
|
||||
self.alpha = params['alpha']
|
||||
self.beta = params['beta']
|
||||
self.ohem_ratio = params['ohem_ratio']
|
||||
|
||||
def __call__(self, predicts, labels):
|
||||
label_shrink_map = labels['shrink_map']
|
||||
label_shrink_mask = labels['shrink_mask']
|
||||
label_threshold_map = labels['threshold_map']
|
||||
label_threshold_mask = labels['threshold_mask']
|
||||
pred = predicts['maps']
|
||||
shrink_maps = pred[:, 0, :, :]
|
||||
threshold_maps = pred[:, 1, :, :]
|
||||
binary_maps = pred[:, 2, :, :]
|
||||
|
||||
loss_shrink_maps = BalanceLoss(
|
||||
shrink_maps,
|
||||
label_shrink_map,
|
||||
label_shrink_mask,
|
||||
balance_loss=self.balance_loss,
|
||||
main_loss_type=self.main_loss_type,
|
||||
negative_ratio=self.ohem_ratio)
|
||||
loss_threshold_maps = MaskL1Loss(threshold_maps, label_threshold_map,
|
||||
label_threshold_mask)
|
||||
loss_binary_maps = DiceLoss(binary_maps, label_shrink_map,
|
||||
label_shrink_mask)
|
||||
loss_shrink_maps = self.alpha * loss_shrink_maps
|
||||
loss_threshold_maps = self.beta * loss_threshold_maps
|
||||
|
||||
loss_all = loss_shrink_maps + loss_threshold_maps\
|
||||
+ loss_binary_maps
|
||||
losses = {'total_loss':loss_all,\
|
||||
"loss_shrink_maps":loss_shrink_maps,\
|
||||
"loss_threshold_maps":loss_threshold_maps,\
|
||||
"loss_binary_maps":loss_binary_maps}
|
||||
return losses
|
|
@ -0,0 +1,61 @@
|
|||
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle.fluid as fluid
|
||||
|
||||
|
||||
class EASTLoss(object):
|
||||
"""
|
||||
EAST Loss function
|
||||
"""
|
||||
|
||||
def __init__(self, params=None):
|
||||
super(EASTLoss, self).__init__()
|
||||
|
||||
def __call__(self, predicts, labels):
|
||||
f_score = predicts['f_score']
|
||||
f_geo = predicts['f_geo']
|
||||
l_score = labels['score']
|
||||
l_geo = labels['geo']
|
||||
l_mask = labels['mask']
|
||||
##dice_loss
|
||||
intersection = fluid.layers.reduce_sum(f_score * l_score * l_mask)
|
||||
union = fluid.layers.reduce_sum(f_score * l_mask)\
|
||||
+ fluid.layers.reduce_sum(l_score * l_mask)
|
||||
dice_loss = 1 - 2 * intersection / (union + 1e-5)
|
||||
#smoooth_l1_loss
|
||||
channels = 8
|
||||
l_geo_split = fluid.layers.split(
|
||||
l_geo, num_or_sections=channels + 1, dim=1)
|
||||
f_geo_split = fluid.layers.split(f_geo, num_or_sections=channels, dim=1)
|
||||
smooth_l1 = 0
|
||||
for i in range(0, channels):
|
||||
geo_diff = l_geo_split[i] - f_geo_split[i]
|
||||
abs_geo_diff = fluid.layers.abs(geo_diff)
|
||||
smooth_l1_sign = fluid.layers.less_than(abs_geo_diff, l_score)
|
||||
smooth_l1_sign = fluid.layers.cast(smooth_l1_sign, dtype='float32')
|
||||
in_loss = abs_geo_diff * abs_geo_diff * smooth_l1_sign + \
|
||||
(abs_geo_diff - 0.5) * (1.0 - smooth_l1_sign)
|
||||
out_loss = l_geo_split[-1] / channels * in_loss * l_score
|
||||
smooth_l1 += out_loss
|
||||
smooth_l1_loss = fluid.layers.reduce_mean(smooth_l1 * l_score)
|
||||
dice_loss = dice_loss * 0.01
|
||||
total_loss = dice_loss + smooth_l1_loss
|
||||
losses = {'total_loss':total_loss, "dice_loss":dice_loss,\
|
||||
"smooth_l1_loss":smooth_l1_loss}
|
||||
return losses
|
|
@ -0,0 +1,38 @@
|
|||
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import math
|
||||
|
||||
import paddle
|
||||
import paddle.fluid as fluid
|
||||
from paddle.fluid.param_attr import ParamAttr
|
||||
import numpy as np
|
||||
|
||||
|
||||
class AttentionLoss(object):
|
||||
def __init__(self, params):
|
||||
super(AttentionLoss, self).__init__()
|
||||
self.char_num = params['char_num']
|
||||
|
||||
def __call__(self, predicts, labels):
|
||||
predict = predicts['predict']
|
||||
label_out = labels['label_out']
|
||||
label_out = fluid.layers.cast(x=label_out, dtype='int64')
|
||||
cost = fluid.layers.cross_entropy(input=predict, label=label_out)
|
||||
sum_cost = fluid.layers.reduce_sum(cost)
|
||||
return sum_cost
|
|
@ -0,0 +1,36 @@
|
|||
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import math
|
||||
|
||||
import paddle
|
||||
import paddle.fluid as fluid
|
||||
|
||||
|
||||
class CTCLoss(object):
|
||||
def __init__(self, params):
|
||||
super(CTCLoss, self).__init__()
|
||||
self.char_num = params['char_num']
|
||||
|
||||
def __call__(self, predicts, labels):
|
||||
predict = predicts['predict']
|
||||
label = labels['label']
|
||||
cost = fluid.layers.warpctc(
|
||||
input=predict, label=label, blank=self.char_num, norm_by_times=True)
|
||||
sum_cost = fluid.layers.reduce_sum(cost)
|
||||
return sum_cost
|
|
@ -0,0 +1,261 @@
|
|||
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import math
|
||||
|
||||
import paddle.fluid as fluid
|
||||
import paddle.fluid.layers as layers
|
||||
from paddle.fluid.param_attr import ParamAttr
|
||||
import numpy as np
|
||||
|
||||
|
||||
class LocalizationNetwork(object):
|
||||
def __init__(self, params):
|
||||
super(LocalizationNetwork, self).__init__()
|
||||
self.F = params['num_fiducial']
|
||||
self.loc_lr = params['loc_lr']
|
||||
self.model_name = params['model_name']
|
||||
|
||||
def conv_bn_layer(self,
|
||||
input,
|
||||
num_filters,
|
||||
filter_size,
|
||||
stride=1,
|
||||
groups=1,
|
||||
act=None,
|
||||
name=None):
|
||||
conv = layers.conv2d(
|
||||
input=input,
|
||||
num_filters=num_filters,
|
||||
filter_size=filter_size,
|
||||
stride=stride,
|
||||
padding=(filter_size - 1) // 2,
|
||||
groups=groups,
|
||||
act=None,
|
||||
param_attr=ParamAttr(name=name + "_weights"),
|
||||
bias_attr=False)
|
||||
bn_name = "bn_" + name
|
||||
return layers.batch_norm(
|
||||
input=conv,
|
||||
act=act,
|
||||
param_attr=ParamAttr(name=bn_name + '_scale'),
|
||||
bias_attr=ParamAttr(bn_name + '_offset'),
|
||||
moving_mean_name=bn_name + '_mean',
|
||||
moving_variance_name=bn_name + '_variance')
|
||||
|
||||
def get_initial_fiducials(self):
|
||||
""" see RARE paper Fig. 6 (a) """
|
||||
F = self.F
|
||||
ctrl_pts_x = np.linspace(-1.0, 1.0, int(F / 2))
|
||||
ctrl_pts_y_top = np.linspace(0.0, -1.0, num=int(F / 2))
|
||||
ctrl_pts_y_bottom = np.linspace(1.0, 0.0, num=int(F / 2))
|
||||
ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
|
||||
ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
|
||||
initial_bias = np.concatenate([ctrl_pts_top, ctrl_pts_bottom], axis=0)
|
||||
return initial_bias
|
||||
|
||||
def __call__(self, image):
|
||||
F = self.F
|
||||
loc_lr = self.loc_lr
|
||||
if self.model_name == "large":
|
||||
num_filters_list = [64, 128, 256, 512]
|
||||
fc_dim = 256
|
||||
else:
|
||||
num_filters_list = [16, 32, 64, 128]
|
||||
fc_dim = 64
|
||||
for fno in range(len(num_filters_list)):
|
||||
num_filters = num_filters_list[fno]
|
||||
name = "loc_conv%d" % fno
|
||||
if fno == 0:
|
||||
conv = self.conv_bn_layer(
|
||||
image, num_filters, 3, act='relu', name=name)
|
||||
else:
|
||||
conv = self.conv_bn_layer(
|
||||
pool, num_filters, 3, act='relu', name=name)
|
||||
|
||||
if fno == len(num_filters_list) - 1:
|
||||
pool = layers.adaptive_pool2d(
|
||||
input=conv, pool_size=[1, 1], pool_type='avg')
|
||||
else:
|
||||
pool = layers.pool2d(
|
||||
input=conv,
|
||||
pool_size=2,
|
||||
pool_stride=2,
|
||||
pool_padding=0,
|
||||
pool_type='max')
|
||||
name = "loc_fc1"
|
||||
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
|
||||
fc1 = layers.fc(input=pool,
|
||||
size=fc_dim,
|
||||
param_attr=fluid.param_attr.ParamAttr(
|
||||
learning_rate=loc_lr,
|
||||
initializer=fluid.initializer.Uniform(-stdv, stdv),
|
||||
name=name + "_w"),
|
||||
act='relu',
|
||||
name=name)
|
||||
|
||||
initial_bias = self.get_initial_fiducials()
|
||||
initial_bias = initial_bias.reshape(-1)
|
||||
name = "loc_fc2"
|
||||
param_attr = fluid.param_attr.ParamAttr(
|
||||
learning_rate=loc_lr,
|
||||
initializer=fluid.initializer.NumpyArrayInitializer(
|
||||
np.zeros([fc_dim, F * 2])),
|
||||
name=name + "_w")
|
||||
bias_attr = fluid.param_attr.ParamAttr(
|
||||
learning_rate=loc_lr,
|
||||
initializer=fluid.initializer.NumpyArrayInitializer(initial_bias),
|
||||
name=name + "_b")
|
||||
fc2 = layers.fc(input=fc1,
|
||||
size=F * 2,
|
||||
param_attr=param_attr,
|
||||
bias_attr=bias_attr,
|
||||
name=name)
|
||||
batch_C_prime = layers.reshape(x=fc2, shape=[-1, F, 2], inplace=False)
|
||||
return batch_C_prime
|
||||
|
||||
|
||||
class GridGenerator(object):
|
||||
def __init__(self, params):
|
||||
super(GridGenerator, self).__init__()
|
||||
self.eps = 1e-6
|
||||
self.F = params['num_fiducial']
|
||||
|
||||
def build_C(self):
|
||||
""" Return coordinates of fiducial points in I_r; C """
|
||||
F = self.F
|
||||
ctrl_pts_x = np.linspace(-1.0, 1.0, int(F / 2))
|
||||
ctrl_pts_y_top = -1 * np.ones(int(F / 2))
|
||||
ctrl_pts_y_bottom = np.ones(int(F / 2))
|
||||
ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
|
||||
ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
|
||||
C = np.concatenate([ctrl_pts_top, ctrl_pts_bottom], axis=0)
|
||||
return C # F x 2
|
||||
|
||||
def build_P(self, I_r_size):
|
||||
I_r_width, I_r_height = I_r_size
|
||||
I_r_grid_x = (np.arange(-I_r_width, I_r_width, 2) + 1.0)\
|
||||
/ I_r_width # self.I_r_width
|
||||
I_r_grid_y = (np.arange(-I_r_height, I_r_height, 2) + 1.0)\
|
||||
/ I_r_height # self.I_r_height
|
||||
# P: self.I_r_width x self.I_r_height x 2
|
||||
P = np.stack(np.meshgrid(I_r_grid_x, I_r_grid_y), axis=2)
|
||||
# n (= self.I_r_width x self.I_r_height) x 2
|
||||
return P.reshape([-1, 2])
|
||||
|
||||
def build_inv_delta_C(self, C):
|
||||
""" Return inv_delta_C which is needed to calculate T """
|
||||
F = self.F
|
||||
hat_C = np.zeros((F, F), dtype=float) # F x F
|
||||
for i in range(0, F):
|
||||
for j in range(i, F):
|
||||
r = np.linalg.norm(C[i] - C[j])
|
||||
hat_C[i, j] = r
|
||||
hat_C[j, i] = r
|
||||
np.fill_diagonal(hat_C, 1)
|
||||
hat_C = (hat_C**2) * np.log(hat_C)
|
||||
# print(C.shape, hat_C.shape)
|
||||
delta_C = np.concatenate( # F+3 x F+3
|
||||
[
|
||||
np.concatenate(
|
||||
[np.ones((F, 1)), C, hat_C], axis=1), # F x F+3
|
||||
np.concatenate(
|
||||
[np.zeros((2, 3)), np.transpose(C)], axis=1), # 2 x F+3
|
||||
np.concatenate(
|
||||
[np.zeros((1, 3)), np.ones((1, F))], axis=1) # 1 x F+3
|
||||
],
|
||||
axis=0)
|
||||
inv_delta_C = np.linalg.inv(delta_C)
|
||||
return inv_delta_C # F+3 x F+3
|
||||
|
||||
def build_P_hat(self, C, P):
|
||||
F = self.F
|
||||
eps = self.eps
|
||||
n = P.shape[0] # n (= self.I_r_width x self.I_r_height)
|
||||
#P_tile: n x 2 -> n x 1 x 2 -> n x F x 2
|
||||
P_tile = np.tile(np.expand_dims(P, axis=1), (1, F, 1))
|
||||
C_tile = np.expand_dims(C, axis=0) # 1 x F x 2
|
||||
P_diff = P_tile - C_tile # n x F x 2
|
||||
#rbf_norm: n x F
|
||||
rbf_norm = np.linalg.norm(P_diff, ord=2, axis=2, keepdims=False)
|
||||
#rbf: n x F
|
||||
rbf = np.multiply(np.square(rbf_norm), np.log(rbf_norm + eps))
|
||||
P_hat = np.concatenate([np.ones((n, 1)), P, rbf], axis=1)
|
||||
return P_hat # n x F+3
|
||||
|
||||
def get_expand_tensor(self, batch_C_prime):
|
||||
name = "ex_fc"
|
||||
initializer = fluid.initializer.ConstantInitializer(value=0.0)
|
||||
param_attr = fluid.param_attr.ParamAttr(
|
||||
learning_rate=0.0, initializer=initializer, name=name + "_w")
|
||||
bias_attr = fluid.param_attr.ParamAttr(
|
||||
learning_rate=0.0, initializer=initializer, name=name + "_b")
|
||||
batch_C_ex_part_tensor = fluid.layers.fc(input=batch_C_prime,
|
||||
size=6,
|
||||
param_attr=param_attr,
|
||||
bias_attr=bias_attr,
|
||||
name=name)
|
||||
batch_C_ex_part_tensor = fluid.layers.reshape(
|
||||
x=batch_C_ex_part_tensor, shape=[-1, 3, 2])
|
||||
return batch_C_ex_part_tensor
|
||||
|
||||
def __call__(self, batch_C_prime, I_r_size):
|
||||
C = self.build_C()
|
||||
P = self.build_P(I_r_size)
|
||||
inv_delta_C = self.build_inv_delta_C(C).astype('float32')
|
||||
P_hat = self.build_P_hat(C, P).astype('float32')
|
||||
|
||||
inv_delta_C_tensor = layers.create_tensor(dtype='float32')
|
||||
layers.assign(inv_delta_C, inv_delta_C_tensor)
|
||||
inv_delta_C_tensor.stop_gradient = True
|
||||
P_hat_tensor = layers.create_tensor(dtype='float32')
|
||||
layers.assign(P_hat, P_hat_tensor)
|
||||
P_hat_tensor.stop_gradient = True
|
||||
|
||||
batch_C_ex_part_tensor = self.get_expand_tensor(batch_C_prime)
|
||||
# batch_C_ex_part_tensor = create_tmp_var(
|
||||
# fluid.default_main_program(),
|
||||
# name='batch_C_ex_part_tensor',
|
||||
# dtype='float32', shape=[-1, 3, 2])
|
||||
# layers.py_func(func=get_batch_C_expand,
|
||||
# x=[batch_C_prime], out=[batch_C_ex_part_tensor])
|
||||
|
||||
batch_C_ex_part_tensor.stop_gradient = True
|
||||
|
||||
batch_C_prime_with_zeros = layers.concat(
|
||||
[batch_C_prime, batch_C_ex_part_tensor], axis=1)
|
||||
batch_T = layers.matmul(inv_delta_C_tensor, batch_C_prime_with_zeros)
|
||||
batch_P_prime = layers.matmul(P_hat_tensor, batch_T)
|
||||
return batch_P_prime
|
||||
|
||||
|
||||
class TPS(object):
|
||||
def __init__(self, params):
|
||||
super(TPS, self).__init__()
|
||||
self.loc_net = LocalizationNetwork(params)
|
||||
self.grid_generator = GridGenerator(params)
|
||||
|
||||
def __call__(self, image):
|
||||
batch_C_prime = self.loc_net(image)
|
||||
I_r_size = [image.shape[3], image.shape[2]]
|
||||
batch_P_prime = self.grid_generator(batch_C_prime, I_r_size)
|
||||
batch_P_prime = layers.reshape(
|
||||
x=batch_P_prime, shape=[-1, image.shape[2], image.shape[3], 2])
|
||||
batch_I_r = layers.grid_sampler(x=image, grid=batch_P_prime)
|
||||
image.stop_gradient = False
|
||||
return batch_I_r
|
|
@ -0,0 +1,36 @@
|
|||
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
import paddle.fluid as fluid
|
||||
|
||||
|
||||
def AdamDecay(params, parameter_list=None):
|
||||
"""
|
||||
define optimizer function
|
||||
args:
|
||||
params(dict): the super parameters
|
||||
parameter_list (list): list of Variable names to update to minimize loss
|
||||
return:
|
||||
"""
|
||||
base_lr = params['base_lr']
|
||||
beta1 = params['beta1']
|
||||
beta2 = params['beta2']
|
||||
optimizer = fluid.optimizer.Adam(
|
||||
learning_rate=base_lr,
|
||||
beta1=beta1,
|
||||
beta2=beta2,
|
||||
parameter_list=parameter_list)
|
||||
return optimizer
|
|
@ -0,0 +1,152 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
import paddle.fluid as fluid
|
||||
|
||||
import numpy as np
|
||||
import string
|
||||
import cv2
|
||||
from shapely.geometry import Polygon
|
||||
import pyclipper
|
||||
|
||||
|
||||
class DBPostProcess(object):
|
||||
"""
|
||||
The post process for Differentiable Binarization (DB).
|
||||
"""
|
||||
|
||||
def __init__(self, params):
|
||||
self.thresh = params['thresh']
|
||||
self.box_thresh = params['box_thresh']
|
||||
self.max_candidates = params['max_candidates']
|
||||
self.min_size = 3
|
||||
|
||||
def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
|
||||
'''
|
||||
_bitmap: single map with shape (1, H, W),
|
||||
whose values are binarized as {0, 1}
|
||||
'''
|
||||
|
||||
bitmap = _bitmap
|
||||
height, width = bitmap.shape
|
||||
|
||||
# img, contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
|
||||
contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8),
|
||||
cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
num_contours = min(len(contours), self.max_candidates)
|
||||
boxes = np.zeros((num_contours, 4, 2), dtype=np.int16)
|
||||
scores = np.zeros((num_contours, ), dtype=np.float32)
|
||||
|
||||
for index in range(num_contours):
|
||||
contour = contours[index]
|
||||
points, sside = self.get_mini_boxes(contour)
|
||||
if sside < self.min_size:
|
||||
continue
|
||||
points = np.array(points)
|
||||
score = self.box_score_fast(pred, points.reshape(-1, 2))
|
||||
if self.box_thresh > score:
|
||||
continue
|
||||
|
||||
box = self.unclip(points).reshape(-1, 1, 2)
|
||||
box, sside = self.get_mini_boxes(box)
|
||||
if sside < self.min_size + 2:
|
||||
continue
|
||||
box = np.array(box)
|
||||
if not isinstance(dest_width, int):
|
||||
dest_width = dest_width.item()
|
||||
dest_height = dest_height.item()
|
||||
|
||||
box[:, 0] = np.clip(
|
||||
np.round(box[:, 0] / width * dest_width), 0, dest_width)
|
||||
box[:, 1] = np.clip(
|
||||
np.round(box[:, 1] / height * dest_height), 0, dest_height)
|
||||
boxes[index, :, :] = box.astype(np.int16)
|
||||
scores[index] = score
|
||||
return boxes, scores
|
||||
|
||||
def unclip(self, box, unclip_ratio=1.5):
|
||||
poly = Polygon(box)
|
||||
distance = poly.area * unclip_ratio / poly.length
|
||||
offset = pyclipper.PyclipperOffset()
|
||||
offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
|
||||
expanded = np.array(offset.Execute(distance))
|
||||
return expanded
|
||||
|
||||
def get_mini_boxes(self, contour):
|
||||
bounding_box = cv2.minAreaRect(contour)
|
||||
points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
|
||||
|
||||
index_1, index_2, index_3, index_4 = 0, 1, 2, 3
|
||||
if points[1][1] > points[0][1]:
|
||||
index_1 = 0
|
||||
index_4 = 1
|
||||
else:
|
||||
index_1 = 1
|
||||
index_4 = 0
|
||||
if points[3][1] > points[2][1]:
|
||||
index_2 = 2
|
||||
index_3 = 3
|
||||
else:
|
||||
index_2 = 3
|
||||
index_3 = 2
|
||||
|
||||
box = [
|
||||
points[index_1], points[index_2], points[index_3], points[index_4]
|
||||
]
|
||||
return box, min(bounding_box[1])
|
||||
|
||||
def box_score_fast(self, bitmap, _box):
|
||||
h, w = bitmap.shape[:2]
|
||||
box = _box.copy()
|
||||
xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1)
|
||||
xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int), 0, w - 1)
|
||||
ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int), 0, h - 1)
|
||||
ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int), 0, h - 1)
|
||||
|
||||
mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
|
||||
box[:, 0] = box[:, 0] - xmin
|
||||
box[:, 1] = box[:, 1] - ymin
|
||||
cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
|
||||
return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
|
||||
|
||||
def __call__(self, outs_dict, ratio_list):
|
||||
pred = outs_dict['maps']
|
||||
pred = pred[:, 0, :, :]
|
||||
segmentation = pred > self.thresh
|
||||
|
||||
boxes_batch = []
|
||||
for batch_index in range(pred.shape[0]):
|
||||
height, width = pred.shape[-2:]
|
||||
tmp_boxes, tmp_scores = self.boxes_from_bitmap(
|
||||
pred[batch_index], segmentation[batch_index], width, height)
|
||||
|
||||
boxes = []
|
||||
for k in range(len(tmp_boxes)):
|
||||
if tmp_scores[k] > self.box_thresh:
|
||||
boxes.append(tmp_boxes[k])
|
||||
if len(boxes) > 0:
|
||||
boxes = np.array(boxes)
|
||||
|
||||
ratio_h, ratio_w = ratio_list[batch_index]
|
||||
boxes[:, :, 0] = boxes[:, :, 0] / ratio_w
|
||||
boxes[:, :, 1] = boxes[:, :, 1] / ratio_h
|
||||
|
||||
boxes_batch.append(boxes)
|
||||
return boxes_batch
|
|
@ -0,0 +1,121 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
from .locality_aware_nms import nms_locality
|
||||
import cv2
|
||||
|
||||
|
||||
class EASTPostPocess(object):
|
||||
"""
|
||||
The post process for EAST.
|
||||
"""
|
||||
|
||||
def __init__(self, params):
|
||||
self.score_thresh = params['score_thresh']
|
||||
self.cover_thresh = params['cover_thresh']
|
||||
self.nms_thresh = params['nms_thresh']
|
||||
|
||||
def restore_rectangle_quad(self, origin, geometry):
|
||||
"""
|
||||
Restore rectangle from quadrangle.
|
||||
"""
|
||||
# quad
|
||||
origin_concat = np.concatenate(
|
||||
(origin, origin, origin, origin), axis=1) # (n, 8)
|
||||
pred_quads = origin_concat - geometry
|
||||
pred_quads = pred_quads.reshape((-1, 4, 2)) # (n, 4, 2)
|
||||
return pred_quads
|
||||
|
||||
def detect(self,
|
||||
score_map,
|
||||
geo_map,
|
||||
score_thresh=0.8,
|
||||
cover_thresh=0.1,
|
||||
nms_thresh=0.2):
|
||||
"""
|
||||
restore text boxes from score map and geo map
|
||||
"""
|
||||
score_map = score_map[0]
|
||||
geo_map = np.swapaxes(geo_map, 1, 0)
|
||||
geo_map = np.swapaxes(geo_map, 1, 2)
|
||||
# filter the score map
|
||||
xy_text = np.argwhere(score_map > score_thresh)
|
||||
if len(xy_text) == 0:
|
||||
return []
|
||||
# sort the text boxes via the y axis
|
||||
xy_text = xy_text[np.argsort(xy_text[:, 0])]
|
||||
#restore quad proposals
|
||||
text_box_restored = self.restore_rectangle_quad(
|
||||
xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :])
|
||||
boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
|
||||
boxes[:, :8] = text_box_restored.reshape((-1, 8))
|
||||
boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
|
||||
boxes = nms_locality(boxes.astype(np.float64), nms_thresh)
|
||||
if boxes.shape[0] == 0:
|
||||
return []
|
||||
# Here we filter some low score boxes by the average score map,
|
||||
# this is different from the orginal paper.
|
||||
for i, box in enumerate(boxes):
|
||||
mask = np.zeros_like(score_map, dtype=np.uint8)
|
||||
cv2.fillPoly(mask, box[:8].reshape(
|
||||
(-1, 4, 2)).astype(np.int32) // 4, 1)
|
||||
boxes[i, 8] = cv2.mean(score_map, mask)[0]
|
||||
boxes = boxes[boxes[:, 8] > cover_thresh]
|
||||
return boxes
|
||||
|
||||
def sort_poly(self, p):
|
||||
"""
|
||||
Sort polygons.
|
||||
"""
|
||||
min_axis = np.argmin(np.sum(p, axis=1))
|
||||
p = p[[min_axis, (min_axis + 1) % 4,\
|
||||
(min_axis + 2) % 4, (min_axis + 3) % 4]]
|
||||
if abs(p[0, 0] - p[1, 0]) > abs(p[0, 1] - p[1, 1]):
|
||||
return p
|
||||
else:
|
||||
return p[[0, 3, 2, 1]]
|
||||
|
||||
def __call__(self, outs_dict, ratio_list):
|
||||
score_list = outs_dict['f_score']
|
||||
geo_list = outs_dict['f_geo']
|
||||
img_num = len(ratio_list)
|
||||
dt_boxes_list = []
|
||||
for ino in range(img_num):
|
||||
score = score_list[ino]
|
||||
geo = geo_list[ino]
|
||||
boxes = self.detect(
|
||||
score_map=score,
|
||||
geo_map=geo,
|
||||
score_thresh=self.score_thresh,
|
||||
cover_thresh=self.cover_thresh,
|
||||
nms_thresh=self.nms_thresh)
|
||||
boxes_norm = []
|
||||
if len(boxes) > 0:
|
||||
ratio_h, ratio_w = ratio_list[ino]
|
||||
boxes = boxes[:, :8].reshape((-1, 4, 2))
|
||||
boxes[:, :, 0] /= ratio_w
|
||||
boxes[:, :, 1] /= ratio_h
|
||||
for i_box, box in enumerate(boxes):
|
||||
box = self.sort_poly(box.astype(np.int32))
|
||||
if np.linalg.norm(box[0] - box[1]) < 5 \
|
||||
or np.linalg.norm(box[3] - box[0]) < 5:
|
||||
continue
|
||||
boxes_norm.append(box)
|
||||
dt_boxes_list.append(np.array(boxes_norm))
|
||||
return dt_boxes_list
|
|
@ -0,0 +1,199 @@
|
|||
"""
|
||||
Locality aware nms.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from shapely.geometry import Polygon
|
||||
|
||||
|
||||
def intersection(g, p):
|
||||
"""
|
||||
Intersection.
|
||||
"""
|
||||
g = Polygon(g[:8].reshape((4, 2)))
|
||||
p = Polygon(p[:8].reshape((4, 2)))
|
||||
g = g.buffer(0)
|
||||
p = p.buffer(0)
|
||||
if not g.is_valid or not p.is_valid:
|
||||
return 0
|
||||
inter = Polygon(g).intersection(Polygon(p)).area
|
||||
union = g.area + p.area - inter
|
||||
if union == 0:
|
||||
return 0
|
||||
else:
|
||||
return inter / union
|
||||
|
||||
|
||||
def intersection_iog(g, p):
|
||||
"""
|
||||
Intersection_iog.
|
||||
"""
|
||||
g = Polygon(g[:8].reshape((4, 2)))
|
||||
p = Polygon(p[:8].reshape((4, 2)))
|
||||
if not g.is_valid or not p.is_valid:
|
||||
return 0
|
||||
inter = Polygon(g).intersection(Polygon(p)).area
|
||||
#union = g.area + p.area - inter
|
||||
union = p.area
|
||||
if union == 0:
|
||||
print("p_area is very small")
|
||||
return 0
|
||||
else:
|
||||
return inter / union
|
||||
|
||||
|
||||
def weighted_merge(g, p):
|
||||
"""
|
||||
Weighted merge.
|
||||
"""
|
||||
g[:8] = (g[8] * g[:8] + p[8] * p[:8]) / (g[8] + p[8])
|
||||
g[8] = (g[8] + p[8])
|
||||
return g
|
||||
|
||||
|
||||
def standard_nms(S, thres):
|
||||
"""
|
||||
Standard nms.
|
||||
"""
|
||||
order = np.argsort(S[:, 8])[::-1]
|
||||
keep = []
|
||||
while order.size > 0:
|
||||
i = order[0]
|
||||
keep.append(i)
|
||||
ovr = np.array([intersection(S[i], S[t]) for t in order[1:]])
|
||||
|
||||
inds = np.where(ovr <= thres)[0]
|
||||
order = order[inds + 1]
|
||||
|
||||
return S[keep]
|
||||
|
||||
|
||||
def standard_nms_inds(S, thres):
|
||||
"""
|
||||
Standard nms, retun inds.
|
||||
"""
|
||||
order = np.argsort(S[:, 8])[::-1]
|
||||
keep = []
|
||||
while order.size > 0:
|
||||
i = order[0]
|
||||
keep.append(i)
|
||||
ovr = np.array([intersection(S[i], S[t]) for t in order[1:]])
|
||||
|
||||
inds = np.where(ovr <= thres)[0]
|
||||
order = order[inds + 1]
|
||||
|
||||
return keep
|
||||
|
||||
|
||||
def nms(S, thres):
|
||||
"""
|
||||
nms.
|
||||
"""
|
||||
order = np.argsort(S[:, 8])[::-1]
|
||||
keep = []
|
||||
while order.size > 0:
|
||||
i = order[0]
|
||||
keep.append(i)
|
||||
ovr = np.array([intersection(S[i], S[t]) for t in order[1:]])
|
||||
|
||||
inds = np.where(ovr <= thres)[0]
|
||||
order = order[inds + 1]
|
||||
|
||||
return keep
|
||||
|
||||
|
||||
def soft_nms(boxes_in, Nt_thres=0.3, threshold=0.8, sigma=0.5, method=2):
|
||||
"""
|
||||
soft_nms
|
||||
:para boxes_in, N x 9 (coords + score)
|
||||
:para threshould, eliminate cases min score(0.001)
|
||||
:para Nt_thres, iou_threshi
|
||||
:para sigma, gaussian weght
|
||||
:method, linear or gaussian
|
||||
"""
|
||||
boxes = boxes_in.copy()
|
||||
N = boxes.shape[0]
|
||||
if N is None or N < 1:
|
||||
return np.array([])
|
||||
pos, maxpos = 0, 0
|
||||
weight = 0.0
|
||||
inds = np.arange(N)
|
||||
tbox, sbox = boxes[0].copy(), boxes[0].copy()
|
||||
for i in range(N):
|
||||
maxscore = boxes[i, 8]
|
||||
maxpos = i
|
||||
tbox = boxes[i].copy()
|
||||
ti = inds[i]
|
||||
pos = i + 1
|
||||
#get max box
|
||||
while pos < N:
|
||||
if maxscore < boxes[pos, 8]:
|
||||
maxscore = boxes[pos, 8]
|
||||
maxpos = pos
|
||||
pos = pos + 1
|
||||
#add max box as a detection
|
||||
boxes[i, :] = boxes[maxpos, :]
|
||||
inds[i] = inds[maxpos]
|
||||
#swap
|
||||
boxes[maxpos, :] = tbox
|
||||
inds[maxpos] = ti
|
||||
tbox = boxes[i].copy()
|
||||
pos = i + 1
|
||||
#NMS iteration
|
||||
while pos < N:
|
||||
sbox = boxes[pos].copy()
|
||||
ts_iou_val = intersection(tbox, sbox)
|
||||
if ts_iou_val > 0:
|
||||
if method == 1:
|
||||
if ts_iou_val > Nt_thres:
|
||||
weight = 1 - ts_iou_val
|
||||
else:
|
||||
weight = 1
|
||||
elif method == 2:
|
||||
weight = np.exp(-1.0 * ts_iou_val**2 / sigma)
|
||||
else:
|
||||
if ts_iou_val > Nt_thres:
|
||||
weight = 0
|
||||
else:
|
||||
weight = 1
|
||||
boxes[pos, 8] = weight * boxes[pos, 8]
|
||||
#if box score falls below thresold, discard the box by
|
||||
#swaping last box update N
|
||||
if boxes[pos, 8] < threshold:
|
||||
boxes[pos, :] = boxes[N - 1, :]
|
||||
inds[pos] = inds[N - 1]
|
||||
N = N - 1
|
||||
pos = pos - 1
|
||||
pos = pos + 1
|
||||
|
||||
return boxes[:N]
|
||||
|
||||
|
||||
def nms_locality(polys, thres=0.3):
|
||||
"""
|
||||
locality aware nms of EAST
|
||||
:param polys: a N*9 numpy array. first 8 coordinates, then prob
|
||||
:return: boxes after nms
|
||||
"""
|
||||
S = []
|
||||
p = None
|
||||
for g in polys:
|
||||
if p is not None and intersection(g, p) > thres:
|
||||
p = weighted_merge(g, p)
|
||||
else:
|
||||
if p is not None:
|
||||
S.append(p)
|
||||
p = g
|
||||
if p is not None:
|
||||
S.append(p)
|
||||
|
||||
if len(S) == 0:
|
||||
return np.array([])
|
||||
return standard_nms(np.array(S), thres)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 343,350,448,135,474,143,369,359
|
||||
print(
|
||||
Polygon(np.array([[343, 350], [448, 135], [474, 143], [369, 359]]))
|
||||
.area)
|
|
@ -0,0 +1,13 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
|
@ -0,0 +1,171 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
import string
|
||||
import re
|
||||
from .check import check_config_params
|
||||
import sys
|
||||
|
||||
|
||||
class CharacterOps(object):
|
||||
""" Convert between text-label and text-index """
|
||||
|
||||
def __init__(self, config):
|
||||
self.character_type = config['character_type']
|
||||
self.loss_type = config['loss_type']
|
||||
if self.character_type == "en":
|
||||
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
|
||||
dict_character = list(self.character_str)
|
||||
elif self.character_type == "ch":
|
||||
character_dict_path = config['character_dict_path']
|
||||
self.character_str = ""
|
||||
with open(character_dict_path, "rb") as fin:
|
||||
lines = fin.readlines()
|
||||
for line in lines:
|
||||
line = line.decode('utf-8').strip("\n")
|
||||
self.character_str += line
|
||||
dict_character = list(self.character_str)
|
||||
elif self.character_type == "en_sensitive":
|
||||
# same with ASTER setting (use 94 char).
|
||||
self.character_str = string.printable[:-6]
|
||||
dict_character = list(self.character_str)
|
||||
else:
|
||||
self.character_str = None
|
||||
assert self.character_str is not None, \
|
||||
"Nonsupport type of the character: {}".format(self.character_str)
|
||||
self.beg_str = "sos"
|
||||
self.end_str = "eos"
|
||||
if self.loss_type == "attention":
|
||||
dict_character = [self.beg_str, self.end_str] + dict_character
|
||||
self.dict = {}
|
||||
for i, char in enumerate(dict_character):
|
||||
self.dict[char] = i
|
||||
self.character = dict_character
|
||||
|
||||
def encode(self, text):
|
||||
"""convert text-label into text-index.
|
||||
input:
|
||||
text: text labels of each image. [batch_size]
|
||||
|
||||
output:
|
||||
text: concatenated text index for CTCLoss.
|
||||
[sum(text_lengths)] = [text_index_0 + text_index_1 + ... + text_index_(n - 1)]
|
||||
length: length of each text. [batch_size]
|
||||
"""
|
||||
if self.character_type == "en":
|
||||
text = text.lower()
|
||||
|
||||
text_list = []
|
||||
for char in text:
|
||||
if char not in self.dict:
|
||||
continue
|
||||
text_list.append(self.dict[char])
|
||||
text = np.array(text_list)
|
||||
return text
|
||||
|
||||
def decode(self, text_index, is_remove_duplicate=False):
|
||||
""" convert text-index into text-label. """
|
||||
char_list = []
|
||||
char_num = self.get_char_num()
|
||||
|
||||
if self.loss_type == "attention":
|
||||
beg_idx = self.get_beg_end_flag_idx("beg")
|
||||
end_idx = self.get_beg_end_flag_idx("end")
|
||||
ignored_tokens = [beg_idx, end_idx]
|
||||
else:
|
||||
ignored_tokens = [char_num]
|
||||
|
||||
for idx in range(len(text_index)):
|
||||
if text_index[idx] in ignored_tokens:
|
||||
continue
|
||||
if is_remove_duplicate:
|
||||
if idx > 0 and text_index[idx - 1] == text_index[idx]:
|
||||
continue
|
||||
char_list.append(self.character[text_index[idx]])
|
||||
text = ''.join(char_list)
|
||||
return text
|
||||
|
||||
def get_char_num(self):
|
||||
return len(self.character)
|
||||
|
||||
def get_beg_end_flag_idx(self, beg_or_end):
|
||||
if self.loss_type == "attention":
|
||||
if beg_or_end == "beg":
|
||||
idx = np.array(self.dict[self.beg_str])
|
||||
elif beg_or_end == "end":
|
||||
idx = np.array(self.dict[self.end_str])
|
||||
else:
|
||||
assert False, "Unsupport type %s in get_beg_end_flag_idx"\
|
||||
% beg_or_end
|
||||
return idx
|
||||
else:
|
||||
err = "error in get_beg_end_flag_idx when using the loss %s"\
|
||||
% (self.loss_type)
|
||||
assert False, err
|
||||
|
||||
|
||||
def cal_predicts_accuracy(char_ops,
|
||||
preds,
|
||||
preds_lod,
|
||||
labels,
|
||||
labels_lod,
|
||||
is_remove_duplicate=False):
|
||||
acc_num = 0
|
||||
img_num = 0
|
||||
for ino in range(len(labels_lod) - 1):
|
||||
beg_no = preds_lod[ino]
|
||||
end_no = preds_lod[ino + 1]
|
||||
preds_text = preds[beg_no:end_no].reshape(-1)
|
||||
preds_text = char_ops.decode(preds_text, is_remove_duplicate)
|
||||
|
||||
beg_no = labels_lod[ino]
|
||||
end_no = labels_lod[ino + 1]
|
||||
labels_text = labels[beg_no:end_no].reshape(-1)
|
||||
labels_text = char_ops.decode(labels_text, is_remove_duplicate)
|
||||
img_num += 1
|
||||
|
||||
if preds_text == labels_text:
|
||||
acc_num += 1
|
||||
acc = acc_num * 1.0 / img_num
|
||||
return acc, acc_num, img_num
|
||||
|
||||
|
||||
def convert_rec_attention_infer_res(preds):
|
||||
img_num = preds.shape[0]
|
||||
target_lod = [0]
|
||||
convert_ids = []
|
||||
for ino in range(img_num):
|
||||
end_pos = np.where(preds[ino, :] == 1)[0]
|
||||
if len(end_pos) <= 1:
|
||||
text_list = preds[ino, 1:]
|
||||
else:
|
||||
text_list = preds[ino, 1:end_pos[1]]
|
||||
target_lod.append(target_lod[ino] + len(text_list))
|
||||
convert_ids = convert_ids + list(text_list)
|
||||
convert_ids = np.array(convert_ids)
|
||||
convert_ids = convert_ids.reshape((-1, 1))
|
||||
return convert_ids, target_lod
|
||||
|
||||
|
||||
def convert_rec_label_to_lod(ori_labels):
|
||||
img_num = len(ori_labels)
|
||||
target_lod = [0]
|
||||
convert_ids = []
|
||||
for ino in range(img_num):
|
||||
target_lod.append(target_lod[ino] + len(ori_labels[ino]))
|
||||
convert_ids = convert_ids + list(ori_labels[ino])
|
||||
convert_ids = np.array(convert_ids)
|
||||
convert_ids = convert_ids.reshape((-1, 1))
|
||||
return convert_ids, target_lod
|
|
@ -0,0 +1,33 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import sys
|
||||
|
||||
import paddle.fluid as fluid
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def check_config_params(config, config_name, params):
|
||||
for param in params:
|
||||
if param not in config:
|
||||
err = "param %s didn't find in %s!" % (param, config_name)
|
||||
assert False, err
|
||||
return
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,131 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import errno
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
|
||||
import paddle
|
||||
import paddle.fluid as fluid
|
||||
|
||||
from .utility import initial_logger
|
||||
import re
|
||||
logger = initial_logger()
|
||||
|
||||
|
||||
def _mkdir_if_not_exist(path):
|
||||
"""
|
||||
mkdir if not exists, ignore the exception when multiprocess mkdir together
|
||||
"""
|
||||
if not os.path.exists(path):
|
||||
try:
|
||||
os.makedirs(path)
|
||||
except OSError as e:
|
||||
if e.errno == errno.EEXIST and os.path.isdir(path):
|
||||
logger.warning(
|
||||
'be happy if some process has already created {}'.format(
|
||||
path))
|
||||
else:
|
||||
raise OSError('Failed to mkdir {}'.format(path))
|
||||
|
||||
|
||||
def _load_state(path):
|
||||
if os.path.exists(path + '.pdopt'):
|
||||
# XXX another hack to ignore the optimizer state
|
||||
tmp = tempfile.mkdtemp()
|
||||
dst = os.path.join(tmp, os.path.basename(os.path.normpath(path)))
|
||||
shutil.copy(path + '.pdparams', dst + '.pdparams')
|
||||
state = fluid.io.load_program_state(dst)
|
||||
shutil.rmtree(tmp)
|
||||
else:
|
||||
state = fluid.io.load_program_state(path)
|
||||
return state
|
||||
|
||||
|
||||
def load_params(exe, prog, path, ignore_params=[]):
|
||||
"""
|
||||
Load model from the given path.
|
||||
Args:
|
||||
exe (fluid.Executor): The fluid.Executor object.
|
||||
prog (fluid.Program): load weight to which Program object.
|
||||
path (string): URL string or loca model path.
|
||||
ignore_params (list): ignore variable to load when finetuning.
|
||||
It can be specified by finetune_exclude_pretrained_params
|
||||
and the usage can refer to docs/advanced_tutorials/TRANSFER_LEARNING.md
|
||||
"""
|
||||
if not (os.path.isdir(path) or os.path.exists(path + '.pdparams')):
|
||||
raise ValueError("Model pretrain path {} does not "
|
||||
"exists.".format(path))
|
||||
|
||||
logger.info('Loading parameters from {}...'.format(path))
|
||||
|
||||
ignore_set = set()
|
||||
state = _load_state(path)
|
||||
|
||||
# ignore the parameter which mismatch the shape
|
||||
# between the model and pretrain weight.
|
||||
all_var_shape = {}
|
||||
for block in prog.blocks:
|
||||
for param in block.all_parameters():
|
||||
all_var_shape[param.name] = param.shape
|
||||
ignore_set.update([
|
||||
name for name, shape in all_var_shape.items()
|
||||
if name in state and shape != state[name].shape
|
||||
])
|
||||
|
||||
if ignore_params:
|
||||
all_var_names = [var.name for var in prog.list_vars()]
|
||||
ignore_list = filter(
|
||||
lambda var: any([re.match(name, var) for name in ignore_params]),
|
||||
all_var_names)
|
||||
ignore_set.update(list(ignore_list))
|
||||
|
||||
if len(ignore_set) > 0:
|
||||
for k in ignore_set:
|
||||
if k in state:
|
||||
logger.warning('variable {} not used'.format(k))
|
||||
del state[k]
|
||||
fluid.io.set_program_state(prog, state)
|
||||
|
||||
|
||||
def init_model(config, program, exe):
|
||||
"""
|
||||
load model from checkpoint or pretrained_model
|
||||
"""
|
||||
checkpoints = config['Global'].get('checkpoints')
|
||||
if checkpoints:
|
||||
path = checkpoints
|
||||
fluid.load(program, path, exe)
|
||||
logger.info("Finish initing model from {}".format(path))
|
||||
return
|
||||
|
||||
pretrain_weights = config['Global'].get('pretrain_weights')
|
||||
if pretrain_weights:
|
||||
path = pretrain_weights
|
||||
load_params(exe, program, path)
|
||||
logger.info("Finish initing model from {}".format(path))
|
||||
return
|
||||
|
||||
|
||||
def save_model(program, model_path):
|
||||
"""
|
||||
save model to the target path
|
||||
"""
|
||||
fluid.save(program, model_path)
|
||||
logger.info("Already save model in {}".format(model_path))
|
|
@ -0,0 +1,65 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import collections
|
||||
import numpy as np
|
||||
import datetime
|
||||
|
||||
__all__ = ['TrainingStats', 'Time']
|
||||
|
||||
|
||||
class SmoothedValue(object):
|
||||
"""Track a series of values and provide access to smoothed values over a
|
||||
window or the global series average.
|
||||
"""
|
||||
|
||||
def __init__(self, window_size):
|
||||
self.deque = collections.deque(maxlen=window_size)
|
||||
|
||||
def add_value(self, value):
|
||||
self.deque.append(value)
|
||||
|
||||
def get_median_value(self):
|
||||
return np.median(self.deque)
|
||||
|
||||
|
||||
def Time():
|
||||
return datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')
|
||||
|
||||
|
||||
class TrainingStats(object):
|
||||
def __init__(self, window_size, stats_keys):
|
||||
self.smoothed_losses_and_metrics = {
|
||||
key: SmoothedValue(window_size)
|
||||
for key in stats_keys
|
||||
}
|
||||
|
||||
def update(self, stats):
|
||||
for k, v in self.smoothed_losses_and_metrics.items():
|
||||
v.add_value(stats[k])
|
||||
|
||||
def get(self, extras=None):
|
||||
stats = collections.OrderedDict()
|
||||
if extras:
|
||||
for k, v in extras.items():
|
||||
stats[k] = v
|
||||
for k, v in self.smoothed_losses_and_metrics.items():
|
||||
stats[k] = round(v.get_median_value(), 6)
|
||||
|
||||
return stats
|
||||
|
||||
def log(self, extras=None):
|
||||
d = self.get(extras)
|
||||
strs = ', '.join(str(dict({x: y})).strip('{}') for x, y in d.items())
|
||||
return strs
|
|
@ -0,0 +1,71 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import logging
|
||||
|
||||
|
||||
def initial_logger():
|
||||
FORMAT = '%(asctime)s-%(levelname)s: %(message)s'
|
||||
logging.basicConfig(level=logging.INFO, format=FORMAT)
|
||||
logger = logging.getLogger(__name__)
|
||||
return logger
|
||||
|
||||
|
||||
import importlib
|
||||
|
||||
|
||||
def create_module(module_str):
|
||||
tmpss = module_str.split(",")
|
||||
assert len(tmpss) == 2, "Error formate\
|
||||
of the module path: {}".format(module_str)
|
||||
module_name, function_name = tmpss[0], tmpss[1]
|
||||
somemodule = importlib.import_module(module_name, __package__)
|
||||
function = getattr(somemodule, function_name)
|
||||
return function
|
||||
|
||||
|
||||
def get_check_global_params(mode):
|
||||
check_params = ['use_gpu', 'max_text_length', 'image_shape',\
|
||||
'image_shape', 'character_type', 'loss_type']
|
||||
if mode == "train_eval":
|
||||
check_params = check_params + [\
|
||||
'train_batch_size_per_card', 'test_batch_size_per_card']
|
||||
elif mode == "test":
|
||||
check_params = check_params + ['test_batch_size_per_card']
|
||||
return check_params
|
||||
|
||||
|
||||
def get_check_reader_params(mode):
|
||||
check_params = []
|
||||
if mode == "train_eval":
|
||||
check_params = ['TrainReader', 'EvalReader']
|
||||
elif mode == "test":
|
||||
check_params = ['TestReader']
|
||||
return check_params
|
||||
|
||||
|
||||
from paddle import fluid
|
||||
|
||||
|
||||
def create_multi_devices_program(program, loss_var_name):
|
||||
build_strategy = fluid.BuildStrategy()
|
||||
build_strategy.memory_optimize = False
|
||||
build_strategy.enable_inplace = True
|
||||
exec_strategy = fluid.ExecutionStrategy()
|
||||
exec_strategy.num_iteration_per_drop_scope = 1
|
||||
compile_program = fluid.CompiledProgram(program).with_data_parallel(
|
||||
loss_name=loss_var_name,
|
||||
build_strategy=build_strategy,
|
||||
exec_strategy=exec_strategy)
|
||||
return compile_program
|
|
@ -0,0 +1,102 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
|
||||
|
||||
def set_paddle_flags(**kwargs):
|
||||
for key, value in kwargs.items():
|
||||
if os.environ.get(key, None) is None:
|
||||
os.environ[key] = str(value)
|
||||
|
||||
|
||||
# NOTE(paddle-dev): All of these flags should be
|
||||
# set before `import paddle`. Otherwise, it would
|
||||
# not take any effect.
|
||||
set_paddle_flags(
|
||||
FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
|
||||
)
|
||||
|
||||
import program
|
||||
from paddle import fluid
|
||||
from ppocr.utils.utility import initial_logger
|
||||
logger = initial_logger()
|
||||
from ppocr.data.reader_main import reader_main
|
||||
from ppocr.utils.save_load import init_model
|
||||
from eval_utils.eval_det_utils import eval_det_run
|
||||
from eval_utils.eval_rec_utils import test_rec_benchmark
|
||||
from eval_utils.eval_rec_utils import eval_rec_run
|
||||
from ppocr.utils.character import CharacterOps
|
||||
|
||||
|
||||
def main():
|
||||
config = program.load_config(FLAGS.config)
|
||||
program.merge_config(FLAGS.opt)
|
||||
logger.info(config)
|
||||
|
||||
# check if set use_gpu=True in paddlepaddle cpu version
|
||||
use_gpu = config['Global']['use_gpu']
|
||||
program.check_gpu(True)
|
||||
|
||||
alg = config['Global']['algorithm']
|
||||
assert alg in ['EAST', 'DB', 'Rosetta', 'CRNN', 'STARNet', 'RARE']
|
||||
if alg in ['Rosetta', 'CRNN', 'STARNet', 'RARE']:
|
||||
config['Global']['char_ops'] = CharacterOps(config['Global'])
|
||||
|
||||
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
|
||||
startup_prog = fluid.Program()
|
||||
eval_program = fluid.Program()
|
||||
eval_build_outputs = program.build(
|
||||
config, eval_program, startup_prog, mode='test')
|
||||
eval_fetch_name_list = eval_build_outputs[1]
|
||||
eval_fetch_varname_list = eval_build_outputs[2]
|
||||
eval_program = eval_program.clone(for_test=True)
|
||||
exe = fluid.Executor(place)
|
||||
exe.run(startup_prog)
|
||||
|
||||
init_model(config, eval_program, exe)
|
||||
|
||||
if alg in ['EAST', 'DB']:
|
||||
eval_reader = reader_main(config=config, mode="test")
|
||||
eval_info_dict = {'program':eval_program,\
|
||||
'reader':eval_reader,\
|
||||
'fetch_name_list':eval_fetch_name_list,\
|
||||
'fetch_varname_list':eval_fetch_varname_list}
|
||||
metrics = eval_det_run(exe, config, eval_info_dict, "test")
|
||||
else:
|
||||
dataset = config['Global']['dataset']
|
||||
assert dataset in ['lmdb', 'common']
|
||||
if dataset == 'common':
|
||||
eval_reader = reader_main(config=config, mode="eval")
|
||||
eval_info_dict = {'program': eval_program, \
|
||||
'reader': eval_reader, \
|
||||
'fetch_name_list': eval_fetch_name_list, \
|
||||
'fetch_varname_list': eval_fetch_varname_list}
|
||||
metrics = eval_rec_run(exe, config, eval_info_dict, "eval")
|
||||
print("Eval result:", metrics)
|
||||
else:
|
||||
eval_info_dict = {'program':eval_program,\
|
||||
'fetch_name_list':eval_fetch_name_list,\
|
||||
'fetch_varname_list':eval_fetch_varname_list}
|
||||
test_rec_benchmark(exe, config, eval_info_dict)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = program.ArgsParser()
|
||||
FLAGS = parser.parse_args()
|
||||
main()
|
|
@ -0,0 +1,13 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
|
@ -0,0 +1,231 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
from collections import namedtuple
|
||||
import numpy as np
|
||||
from shapely.geometry import Polygon
|
||||
|
||||
|
||||
class DetectionIoUEvaluator(object):
|
||||
def __init__(self, iou_constraint=0.5, area_precision_constraint=0.5):
|
||||
self.iou_constraint = iou_constraint
|
||||
self.area_precision_constraint = area_precision_constraint
|
||||
|
||||
def evaluate_image(self, gt, pred):
|
||||
def get_union(pD, pG):
|
||||
return Polygon(pD).union(Polygon(pG)).area
|
||||
|
||||
def get_intersection_over_union(pD, pG):
|
||||
return get_intersection(pD, pG) / get_union(pD, pG)
|
||||
|
||||
def get_intersection(pD, pG):
|
||||
return Polygon(pD).intersection(Polygon(pG)).area
|
||||
|
||||
def compute_ap(confList, matchList, numGtCare):
|
||||
correct = 0
|
||||
AP = 0
|
||||
if len(confList) > 0:
|
||||
confList = np.array(confList)
|
||||
matchList = np.array(matchList)
|
||||
sorted_ind = np.argsort(-confList)
|
||||
confList = confList[sorted_ind]
|
||||
matchList = matchList[sorted_ind]
|
||||
for n in range(len(confList)):
|
||||
match = matchList[n]
|
||||
if match:
|
||||
correct += 1
|
||||
AP += float(correct) / (n + 1)
|
||||
|
||||
if numGtCare > 0:
|
||||
AP /= numGtCare
|
||||
|
||||
return AP
|
||||
|
||||
perSampleMetrics = {}
|
||||
|
||||
matchedSum = 0
|
||||
|
||||
Rectangle = namedtuple('Rectangle', 'xmin ymin xmax ymax')
|
||||
|
||||
numGlobalCareGt = 0
|
||||
numGlobalCareDet = 0
|
||||
|
||||
arrGlobalConfidences = []
|
||||
arrGlobalMatches = []
|
||||
|
||||
recall = 0
|
||||
precision = 0
|
||||
hmean = 0
|
||||
|
||||
detMatched = 0
|
||||
|
||||
iouMat = np.empty([1, 1])
|
||||
|
||||
gtPols = []
|
||||
detPols = []
|
||||
|
||||
gtPolPoints = []
|
||||
detPolPoints = []
|
||||
|
||||
# Array of Ground Truth Polygons' keys marked as don't Care
|
||||
gtDontCarePolsNum = []
|
||||
# Array of Detected Polygons' matched with a don't Care GT
|
||||
detDontCarePolsNum = []
|
||||
|
||||
pairs = []
|
||||
detMatchedNums = []
|
||||
|
||||
arrSampleConfidences = []
|
||||
arrSampleMatch = []
|
||||
|
||||
evaluationLog = ""
|
||||
|
||||
# print(len(gt))
|
||||
for n in range(len(gt)):
|
||||
points = gt[n]['points']
|
||||
# transcription = gt[n]['text']
|
||||
dontCare = gt[n]['ignore']
|
||||
points = Polygon(points)
|
||||
points = points.buffer(0)
|
||||
if not Polygon(points).is_valid or not Polygon(points).is_simple:
|
||||
continue
|
||||
|
||||
gtPol = points
|
||||
gtPols.append(gtPol)
|
||||
gtPolPoints.append(points)
|
||||
if dontCare:
|
||||
gtDontCarePolsNum.append(len(gtPols) - 1)
|
||||
|
||||
evaluationLog += "GT polygons: " + str(len(gtPols)) + (
|
||||
" (" + str(len(gtDontCarePolsNum)) + " don't care)\n"
|
||||
if len(gtDontCarePolsNum) > 0 else "\n")
|
||||
|
||||
for n in range(len(pred)):
|
||||
points = pred[n]['points']
|
||||
points = Polygon(points)
|
||||
points = points.buffer(0)
|
||||
if not Polygon(points).is_valid or not Polygon(points).is_simple:
|
||||
continue
|
||||
|
||||
detPol = points
|
||||
detPols.append(detPol)
|
||||
detPolPoints.append(points)
|
||||
if len(gtDontCarePolsNum) > 0:
|
||||
for dontCarePol in gtDontCarePolsNum:
|
||||
dontCarePol = gtPols[dontCarePol]
|
||||
intersected_area = get_intersection(dontCarePol, detPol)
|
||||
pdDimensions = Polygon(detPol).area
|
||||
precision = 0 if pdDimensions == 0 else intersected_area / pdDimensions
|
||||
if (precision > self.area_precision_constraint):
|
||||
detDontCarePolsNum.append(len(detPols) - 1)
|
||||
break
|
||||
|
||||
evaluationLog += "DET polygons: " + str(len(detPols)) + (
|
||||
" (" + str(len(detDontCarePolsNum)) + " don't care)\n"
|
||||
if len(detDontCarePolsNum) > 0 else "\n")
|
||||
|
||||
if len(gtPols) > 0 and len(detPols) > 0:
|
||||
# Calculate IoU and precision matrixs
|
||||
outputShape = [len(gtPols), len(detPols)]
|
||||
iouMat = np.empty(outputShape)
|
||||
gtRectMat = np.zeros(len(gtPols), np.int8)
|
||||
detRectMat = np.zeros(len(detPols), np.int8)
|
||||
for gtNum in range(len(gtPols)):
|
||||
for detNum in range(len(detPols)):
|
||||
pG = gtPols[gtNum]
|
||||
pD = detPols[detNum]
|
||||
iouMat[gtNum, detNum] = get_intersection_over_union(pD, pG)
|
||||
|
||||
for gtNum in range(len(gtPols)):
|
||||
for detNum in range(len(detPols)):
|
||||
if gtRectMat[gtNum] == 0 and detRectMat[
|
||||
detNum] == 0 and gtNum not in gtDontCarePolsNum and detNum not in detDontCarePolsNum:
|
||||
if iouMat[gtNum, detNum] > self.iou_constraint:
|
||||
gtRectMat[gtNum] = 1
|
||||
detRectMat[detNum] = 1
|
||||
detMatched += 1
|
||||
pairs.append({'gt': gtNum, 'det': detNum})
|
||||
detMatchedNums.append(detNum)
|
||||
evaluationLog += "Match GT #" + \
|
||||
str(gtNum) + " with Det #" + str(detNum) + "\n"
|
||||
|
||||
numGtCare = (len(gtPols) - len(gtDontCarePolsNum))
|
||||
numDetCare = (len(detPols) - len(detDontCarePolsNum))
|
||||
if numGtCare == 0:
|
||||
recall = float(1)
|
||||
precision = float(0) if numDetCare > 0 else float(1)
|
||||
else:
|
||||
recall = float(detMatched) / numGtCare
|
||||
precision = 0 if numDetCare == 0 else float(detMatched) / numDetCare
|
||||
|
||||
hmean = 0 if (precision + recall) == 0 else 2.0 * \
|
||||
precision * recall / (precision + recall)
|
||||
|
||||
matchedSum += detMatched
|
||||
numGlobalCareGt += numGtCare
|
||||
numGlobalCareDet += numDetCare
|
||||
|
||||
perSampleMetrics = {
|
||||
'precision': precision,
|
||||
'recall': recall,
|
||||
'hmean': hmean,
|
||||
'pairs': pairs,
|
||||
'iouMat': [] if len(detPols) > 100 else iouMat.tolist(),
|
||||
'gtPolPoints': gtPolPoints,
|
||||
'detPolPoints': detPolPoints,
|
||||
'gtCare': numGtCare,
|
||||
'detCare': numDetCare,
|
||||
'gtDontCare': gtDontCarePolsNum,
|
||||
'detDontCare': detDontCarePolsNum,
|
||||
'detMatched': detMatched,
|
||||
'evaluationLog': evaluationLog
|
||||
}
|
||||
|
||||
return perSampleMetrics
|
||||
|
||||
def combine_results(self, results):
|
||||
numGlobalCareGt = 0
|
||||
numGlobalCareDet = 0
|
||||
matchedSum = 0
|
||||
for result in results:
|
||||
numGlobalCareGt += result['gtCare']
|
||||
numGlobalCareDet += result['detCare']
|
||||
matchedSum += result['detMatched']
|
||||
|
||||
methodRecall = 0 if numGlobalCareGt == 0 else float(
|
||||
matchedSum) / numGlobalCareGt
|
||||
methodPrecision = 0 if numGlobalCareDet == 0 else float(
|
||||
matchedSum) / numGlobalCareDet
|
||||
methodHmean = 0 if methodRecall + methodPrecision == 0 else 2 * \
|
||||
methodRecall * methodPrecision / (methodRecall + methodPrecision)
|
||||
# print(methodRecall, methodPrecision, methodHmean)
|
||||
# sys.exit(-1)
|
||||
methodMetrics = {
|
||||
'precision': methodPrecision,
|
||||
'recall': methodRecall,
|
||||
'hmean': methodHmean
|
||||
}
|
||||
|
||||
return methodMetrics
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
evaluator = DetectionIoUEvaluator()
|
||||
gts = [[{
|
||||
'points': [(0, 0), (1, 0), (1, 1), (0, 1)],
|
||||
'text': 1234,
|
||||
'ignore': False,
|
||||
}, {
|
||||
'points': [(2, 2), (3, 2), (3, 3), (2, 3)],
|
||||
'text': 5678,
|
||||
'ignore': False,
|
||||
}]]
|
||||
preds = [[{
|
||||
'points': [(0.1, 0.1), (1, 0), (1, 1), (0, 1)],
|
||||
'text': 123,
|
||||
'ignore': False,
|
||||
}]]
|
||||
results = []
|
||||
for gt, pred in zip(gts, preds):
|
||||
results.append(evaluator.evaluate_image(gt, pred))
|
||||
metrics = evaluator.combine_results(results)
|
||||
print(metrics)
|
|
@ -0,0 +1,131 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import logging
|
||||
import numpy as np
|
||||
|
||||
import paddle.fluid as fluid
|
||||
|
||||
__all__ = ['eval_det_run']
|
||||
|
||||
import logging
|
||||
FORMAT = '%(asctime)s-%(levelname)s: %(message)s'
|
||||
logging.basicConfig(level=logging.INFO, format=FORMAT)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from ppocr.utils.utility import create_module
|
||||
from .eval_det_iou import DetectionIoUEvaluator
|
||||
import json
|
||||
from copy import deepcopy
|
||||
import cv2
|
||||
from ppocr.data.reader_main import reader_main
|
||||
|
||||
|
||||
def cal_det_res(exe, config, eval_info_dict):
|
||||
global_params = config['Global']
|
||||
save_res_path = global_params['save_res_path']
|
||||
postprocess_params = deepcopy(config["PostProcess"])
|
||||
postprocess_params.update(global_params)
|
||||
postprocess = create_module(postprocess_params['function']) \
|
||||
(params=postprocess_params)
|
||||
with open(save_res_path, "wb") as fout:
|
||||
tackling_num = 0
|
||||
for data in eval_info_dict['reader']():
|
||||
img_num = len(data)
|
||||
tackling_num = tackling_num + img_num
|
||||
logger.info("test tackling num:%d", tackling_num)
|
||||
img_list = []
|
||||
ratio_list = []
|
||||
img_name_list = []
|
||||
for ino in range(img_num):
|
||||
img_list.append(data[ino][0])
|
||||
ratio_list.append(data[ino][1])
|
||||
img_name_list.append(data[ino][2])
|
||||
img_list = np.concatenate(img_list, axis=0)
|
||||
outs = exe.run(eval_info_dict['program'], \
|
||||
feed={'image': img_list}, \
|
||||
fetch_list=eval_info_dict['fetch_varname_list'])
|
||||
outs_dict = {}
|
||||
for tno in range(len(outs)):
|
||||
fetch_name = eval_info_dict['fetch_name_list'][tno]
|
||||
fetch_value = np.array(outs[tno])
|
||||
outs_dict[fetch_name] = fetch_value
|
||||
dt_boxes_list = postprocess(outs_dict, ratio_list)
|
||||
for ino in range(img_num):
|
||||
dt_boxes = dt_boxes_list[ino]
|
||||
img_name = img_name_list[ino]
|
||||
dt_boxes_json = []
|
||||
for box in dt_boxes:
|
||||
tmp_json = {"transcription": ""}
|
||||
tmp_json['points'] = box.tolist()
|
||||
dt_boxes_json.append(tmp_json)
|
||||
otstr = img_name + "\t" + json.dumps(dt_boxes_json) + "\n"
|
||||
fout.write(otstr.encode())
|
||||
return
|
||||
|
||||
|
||||
def load_label_infor(label_file_path, do_ignore=False):
|
||||
img_name_label_dict = {}
|
||||
with open(label_file_path, "rb") as fin:
|
||||
lines = fin.readlines()
|
||||
for line in lines:
|
||||
substr = line.decode().strip("\n").split("\t")
|
||||
bbox_infor = json.loads(substr[1])
|
||||
bbox_num = len(bbox_infor)
|
||||
for bno in range(bbox_num):
|
||||
text = bbox_infor[bno]['transcription']
|
||||
ignore = False
|
||||
if text == "###" and do_ignore:
|
||||
ignore = True
|
||||
bbox_infor[bno]['ignore'] = ignore
|
||||
img_name_label_dict[substr[0]] = bbox_infor
|
||||
return img_name_label_dict
|
||||
|
||||
|
||||
def cal_det_metrics(gt_label_path, save_res_path):
|
||||
evaluator = DetectionIoUEvaluator()
|
||||
gt_label_infor = load_label_infor(gt_label_path, do_ignore=True)
|
||||
dt_label_infor = load_label_infor(save_res_path)
|
||||
results = []
|
||||
for img_name in gt_label_infor:
|
||||
gt_label = gt_label_infor[img_name]
|
||||
if img_name not in dt_label_infor:
|
||||
dt_label = []
|
||||
else:
|
||||
dt_label = dt_label_infor[img_name]
|
||||
result = evaluator.evaluate_image(gt_label, dt_label)
|
||||
results.append(result)
|
||||
methodMetrics = evaluator.combine_results(results)
|
||||
return methodMetrics
|
||||
|
||||
|
||||
def eval_det_run(exe, config, eval_info_dict, mode):
|
||||
cal_det_res(exe, config, eval_info_dict)
|
||||
|
||||
save_res_path = config['Global']['save_res_path']
|
||||
if mode == "eval":
|
||||
gt_label_path = config['EvalReader']['label_file_path']
|
||||
metrics = cal_det_metrics(gt_label_path, save_res_path)
|
||||
else:
|
||||
gt_label_path = config['TestReader']['label_file_path']
|
||||
do_eval = config['TestReader']['do_eval']
|
||||
if do_eval:
|
||||
metrics = cal_det_metrics(gt_label_path, save_res_path)
|
||||
else:
|
||||
metrics = {}
|
||||
return metrics
|
|
@ -0,0 +1,111 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import logging
|
||||
import numpy as np
|
||||
|
||||
import paddle.fluid as fluid
|
||||
|
||||
__all__ = ['eval_rec_run', 'test_rec_benchmark']
|
||||
|
||||
import logging
|
||||
|
||||
FORMAT = '%(asctime)s-%(levelname)s: %(message)s'
|
||||
logging.basicConfig(level=logging.INFO, format=FORMAT)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from ppocr.utils.character import cal_predicts_accuracy
|
||||
from ppocr.utils.character import convert_rec_label_to_lod
|
||||
from ppocr.utils.character import convert_rec_attention_infer_res
|
||||
from ppocr.utils.utility import create_module
|
||||
import json
|
||||
from copy import deepcopy
|
||||
import cv2
|
||||
from ppocr.data.reader_main import reader_main
|
||||
|
||||
|
||||
def eval_rec_run(exe, config, eval_info_dict, mode):
|
||||
"""
|
||||
Run evaluation program, return program outputs.
|
||||
"""
|
||||
char_ops = config['Global']['char_ops']
|
||||
total_loss = 0
|
||||
total_sample_num = 0
|
||||
total_acc_num = 0
|
||||
total_batch_num = 0
|
||||
if mode == "eval":
|
||||
is_remove_duplicate = False
|
||||
else:
|
||||
is_remove_duplicate = True
|
||||
|
||||
for data in eval_info_dict['reader']():
|
||||
img_num = len(data)
|
||||
img_list = []
|
||||
label_list = []
|
||||
for ino in range(img_num):
|
||||
img_list.append(data[ino][0])
|
||||
label_list.append(data[ino][1])
|
||||
img_list = np.concatenate(img_list, axis=0)
|
||||
outs = exe.run(eval_info_dict['program'], \
|
||||
feed={'image': img_list}, \
|
||||
fetch_list=eval_info_dict['fetch_varname_list'], \
|
||||
return_numpy=False)
|
||||
preds = np.array(outs[0])
|
||||
if preds.shape[1] != 1:
|
||||
preds, preds_lod = convert_rec_attention_infer_res(preds)
|
||||
else:
|
||||
preds_lod = outs[0].lod()[0]
|
||||
labels, labels_lod = convert_rec_label_to_lod(label_list)
|
||||
acc, acc_num, sample_num = cal_predicts_accuracy(
|
||||
char_ops, preds, preds_lod, labels, labels_lod, is_remove_duplicate)
|
||||
total_acc_num += acc_num
|
||||
total_sample_num += sample_num
|
||||
total_batch_num += 1
|
||||
avg_acc = total_acc_num * 1.0 / total_sample_num
|
||||
metrics = {'avg_acc': avg_acc, "total_acc_num": total_acc_num, \
|
||||
"total_sample_num": total_sample_num}
|
||||
return metrics
|
||||
|
||||
|
||||
def test_rec_benchmark(exe, config, eval_info_dict):
|
||||
" 评估lmdb 数据"
|
||||
eval_data_list = ['IIIT5k_3000', 'SVT', 'IC03_860', 'IC03_867', \
|
||||
'IC13_857', 'IC13_1015', 'IC15_1811', 'IC15_2077', 'SVTP', 'CUTE80']
|
||||
eval_data_dir = config['TestReader']['lmdb_sets_dir']
|
||||
total_evaluation_data_number = 0
|
||||
total_correct_number = 0
|
||||
eval_data_acc_info = {}
|
||||
for eval_data in eval_data_list:
|
||||
config['TestReader']['lmdb_sets_dir'] = \
|
||||
eval_data_dir + "/" + eval_data
|
||||
eval_reader = reader_main(config=config, mode="test")
|
||||
eval_info_dict['reader'] = eval_reader
|
||||
metrics = eval_rec_run(exe, config, eval_info_dict, "test")
|
||||
total_evaluation_data_number += metrics['total_sample_num']
|
||||
total_correct_number += metrics['total_acc_num']
|
||||
eval_data_acc_info[eval_data] = metrics
|
||||
|
||||
avg_acc = total_correct_number * 1.0 / total_evaluation_data_number
|
||||
logger.info('-' * 50)
|
||||
strs = ""
|
||||
for eval_data in eval_data_list:
|
||||
eval_acc = eval_data_acc_info[eval_data]['avg_acc']
|
||||
strs += "\n {}, accuracy:{:.6f}".format(eval_data, eval_acc)
|
||||
strs += "\n average, accuracy:{:.6f}".format(avg_acc)
|
||||
logger.info(strs)
|
||||
logger.info('-' * 50)
|
|
@ -0,0 +1,88 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import multiprocessing
|
||||
import numpy as np
|
||||
|
||||
|
||||
def set_paddle_flags(**kwargs):
|
||||
for key, value in kwargs.items():
|
||||
if os.environ.get(key, None) is None:
|
||||
os.environ[key] = str(value)
|
||||
|
||||
|
||||
# NOTE(paddle-dev): All of these flags should be
|
||||
# set before `import paddle`. Otherwise, it would
|
||||
# not take any effect.
|
||||
set_paddle_flags(
|
||||
FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
|
||||
)
|
||||
|
||||
import program
|
||||
from paddle import fluid
|
||||
from ppocr.utils.utility import initial_logger
|
||||
logger = initial_logger()
|
||||
from ppocr.utils.save_load import init_model
|
||||
from ppocr.utils.character import CharacterOps
|
||||
from ppocr.utils.utility import create_module
|
||||
|
||||
|
||||
def main():
|
||||
config = program.load_config(FLAGS.config)
|
||||
program.merge_config(FLAGS.opt)
|
||||
logger.info(config)
|
||||
|
||||
# check if set use_gpu=True in paddlepaddle cpu version
|
||||
use_gpu = config['Global']['use_gpu']
|
||||
program.check_gpu(True)
|
||||
|
||||
alg = config['Global']['algorithm']
|
||||
assert alg in ['EAST', 'DB', 'Rosetta', 'CRNN', 'STARNet', 'RARE']
|
||||
if alg in ['Rosetta', 'CRNN', 'STARNet', 'RARE']:
|
||||
config['Global']['char_ops'] = CharacterOps(config['Global'])
|
||||
|
||||
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
|
||||
startup_prog = fluid.Program()
|
||||
eval_program = fluid.Program()
|
||||
|
||||
feeded_var_names, target_vars, fetches_var_name = program.build_export(
|
||||
config, eval_program, startup_prog)
|
||||
eval_program = eval_program.clone(for_test=True)
|
||||
exe = fluid.Executor(place)
|
||||
exe.run(startup_prog)
|
||||
|
||||
init_model(config, eval_program, exe)
|
||||
|
||||
fluid.io.save_inference_model(
|
||||
dirname="./output/",
|
||||
feeded_var_names=feeded_var_names,
|
||||
main_program=eval_program,
|
||||
target_vars=target_vars,
|
||||
executor=exe,
|
||||
model_filename='model',
|
||||
params_filename='params')
|
||||
print("save success, output_name_list:", fetches_var_name)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = program.ArgsParser()
|
||||
FLAGS = parser.parse_args()
|
||||
main()
|
|
@ -0,0 +1 @@
|
|||
<paddle.fluid.core_avx.ProgramDesc object at 0x10d15fab0>
|
|
@ -0,0 +1,169 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import utility
|
||||
from ppocr.utils.utility import initial_logger
|
||||
logger = initial_logger()
|
||||
import cv2
|
||||
from ppocr.data.det.east_process import EASTProcessTest
|
||||
from ppocr.data.det.db_process import DBProcessTest
|
||||
from ppocr.postprocess.db_postprocess import DBPostProcess
|
||||
from ppocr.postprocess.east_postprocess import EASTPostPocess
|
||||
import copy
|
||||
import numpy as np
|
||||
import math
|
||||
import time
|
||||
|
||||
|
||||
class TextDetector(object):
|
||||
def __init__(self, args):
|
||||
max_side_len = args.det_max_side_len
|
||||
self.det_algorithm = args.det_algorithm
|
||||
preprocess_params = {'max_side_len': max_side_len}
|
||||
postprocess_params = {}
|
||||
if self.det_algorithm == "DB":
|
||||
self.preprocess_op = DBProcessTest(preprocess_params)
|
||||
postprocess_params["thresh"] = args.det_db_thresh
|
||||
postprocess_params["box_thresh"] = args.det_db_box_thresh
|
||||
postprocess_params["max_candidates"] = 1000
|
||||
self.postprocess_op = DBPostProcess(postprocess_params)
|
||||
elif self.det_algorithm == "EAST":
|
||||
self.preprocess_op = EASTProcessTest(preprocess_params)
|
||||
postprocess_params["score_thresh"] = args.det_east_score_thresh
|
||||
postprocess_params["cover_thresh"] = args.det_east_cover_thresh
|
||||
postprocess_params["nms_thresh"] = args.det_east_nms_thresh
|
||||
self.postprocess_op = EASTPostPocess(postprocess_params)
|
||||
else:
|
||||
logger.info("unknown det_algorithm:{}".format(self.det_algorithm))
|
||||
sys.exit(0)
|
||||
|
||||
self.predictor, self.input_tensor, self.output_tensors =\
|
||||
utility.create_predictor(args, mode="det")
|
||||
|
||||
def order_points_clockwise(self, pts):
|
||||
#######
|
||||
## https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py
|
||||
########
|
||||
# sort the points based on their x-coordinates
|
||||
xSorted = pts[np.argsort(pts[:, 0]), :]
|
||||
|
||||
# grab the left-most and right-most points from the sorted
|
||||
# x-roodinate points
|
||||
leftMost = xSorted[:2, :]
|
||||
rightMost = xSorted[2:, :]
|
||||
|
||||
# now, sort the left-most coordinates according to their
|
||||
# y-coordinates so we can grab the top-left and bottom-left
|
||||
# points, respectively
|
||||
leftMost = leftMost[np.argsort(leftMost[:, 1]), :]
|
||||
(tl, bl) = leftMost
|
||||
|
||||
rightMost = rightMost[np.argsort(rightMost[:, 1]), :]
|
||||
(tr, br) = rightMost
|
||||
|
||||
rect = np.array([tl, tr, br, bl], dtype="float32")
|
||||
return rect
|
||||
|
||||
def expand_det_res(self, points, bbox_height, bbox_width, img_height,
|
||||
img_width):
|
||||
if bbox_height * 1.0 / bbox_width >= 2.0:
|
||||
expand_w = bbox_width * 0.20
|
||||
expand_h = bbox_width * 0.20
|
||||
elif bbox_width * 1.0 / bbox_height >= 3.0:
|
||||
expand_w = bbox_height * 0.20
|
||||
expand_h = bbox_height * 0.20
|
||||
else:
|
||||
expand_w = bbox_height * 0.1
|
||||
expand_h = bbox_height * 0.1
|
||||
|
||||
points[0, 0] = int(max((points[0, 0] - expand_w), 0))
|
||||
points[1, 0] = int(min((points[1, 0] + expand_w), img_width))
|
||||
points[3, 0] = int(max((points[3, 0] - expand_w), 0))
|
||||
points[2, 0] = int(min((points[2, 0] + expand_w), img_width))
|
||||
|
||||
points[0, 1] = int(max((points[0, 1] - expand_h), 0))
|
||||
points[1, 1] = int(max((points[1, 1] - expand_h), 0))
|
||||
points[3, 1] = int(min((points[3, 1] + expand_h), img_height))
|
||||
points[2, 1] = int(min((points[2, 1] + expand_h), img_height))
|
||||
return points
|
||||
|
||||
def filter_tag_det_res(self, dt_boxes, image_shape):
|
||||
img_height, img_width = image_shape[0:2]
|
||||
dt_boxes_new = []
|
||||
for box in dt_boxes:
|
||||
box = self.order_points_clockwise(box)
|
||||
left = int(np.min(box[:, 0]))
|
||||
right = int(np.max(box[:, 0]))
|
||||
top = int(np.min(box[:, 1]))
|
||||
bottom = int(np.max(box[:, 1]))
|
||||
bbox_height = bottom - top
|
||||
bbox_width = right - left
|
||||
diffh = math.fabs(box[0, 1] - box[1, 1])
|
||||
diffw = math.fabs(box[0, 0] - box[3, 0])
|
||||
rect_width = int(np.linalg.norm(box[0] - box[1]))
|
||||
rect_height = int(np.linalg.norm(box[0] - box[3]))
|
||||
if rect_width <= 10 or rect_height <= 10:
|
||||
continue
|
||||
if diffh <= 10 and diffw <= 10:
|
||||
box = self.expand_det_res(
|
||||
copy.deepcopy(box), bbox_height, bbox_width, img_height,
|
||||
img_width)
|
||||
dt_boxes_new.append(box)
|
||||
dt_boxes = np.array(dt_boxes_new)
|
||||
return dt_boxes
|
||||
|
||||
def __call__(self, img):
|
||||
ori_im = img.copy()
|
||||
im, ratio_list = self.preprocess_op(img)
|
||||
if im is None:
|
||||
return None, 0
|
||||
im = im.copy()
|
||||
starttime = time.time()
|
||||
self.input_tensor.copy_from_cpu(im)
|
||||
self.predictor.zero_copy_run()
|
||||
outputs = []
|
||||
for output_tensor in self.output_tensors:
|
||||
output = output_tensor.copy_to_cpu()
|
||||
outputs.append(output)
|
||||
outs_dict = {}
|
||||
if self.det_algorithm == "EAST":
|
||||
outs_dict['f_score'] = outputs[0]
|
||||
outs_dict['f_geo'] = outputs[1]
|
||||
else:
|
||||
outs_dict['maps'] = [outputs[0]]
|
||||
dt_boxes_list = self.postprocess_op(outs_dict, [ratio_list])
|
||||
dt_boxes = dt_boxes_list[0]
|
||||
dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im.shape)
|
||||
elapse = time.time() - starttime
|
||||
return dt_boxes, elapse
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = utility.parse_args()
|
||||
image_file_list = utility.get_image_file_list(args.image_dir)
|
||||
text_detector = TextDetector(args)
|
||||
count = 0
|
||||
total_time = 0
|
||||
for image_file in image_file_list:
|
||||
img = cv2.imread(image_file)
|
||||
if img is None:
|
||||
logger.info("error in loading image:{}".format(image_file))
|
||||
continue
|
||||
dt_boxes, elapse = text_detector(img)
|
||||
if count > 0:
|
||||
total_time += elapse
|
||||
count += 1
|
||||
print("Predict time of %s:" % image_file, elapse)
|
||||
utility.draw_text_det_res(dt_boxes, image_file)
|
||||
print("Avg Time:", total_time / (count - 1))
|
|
@ -0,0 +1,76 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import utility
|
||||
from ppocr.utils.utility import initial_logger
|
||||
logger = initial_logger()
|
||||
import cv2
|
||||
import predict_system
|
||||
import copy
|
||||
import numpy as np
|
||||
import math
|
||||
import time
|
||||
import json
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = utility.parse_args()
|
||||
text_sys = predict_system.TextSystem(args)
|
||||
|
||||
image_file_list = []
|
||||
label_file_path = "./eval_perform/gt_res/test_chinese_ic15_500_4pts.txt"
|
||||
img_set_path = "./eval_perform/"
|
||||
with open(label_file_path, "rb") as fin:
|
||||
lines = fin.readlines()
|
||||
for line in lines:
|
||||
substr = line.decode('utf-8').strip("\n").split("\t")
|
||||
if "lsvt" in substr[0]:
|
||||
continue
|
||||
image_file_list.append(substr[0])
|
||||
|
||||
total_time_all = 0
|
||||
count = 0
|
||||
save_path = "./output/predict.txt"
|
||||
fout = open(save_path, "wb")
|
||||
for image_name in image_file_list:
|
||||
image_file = img_set_path + image_name
|
||||
img = cv2.imread(image_file)
|
||||
if img is None:
|
||||
logger.info("error in loading image:{}".format(image_file))
|
||||
continue
|
||||
count += 1
|
||||
total_time = 0
|
||||
starttime = time.time()
|
||||
dt_boxes, rec_res = text_sys(img)
|
||||
elapse = time.time() - starttime
|
||||
total_time_all += elapse
|
||||
print("Predict time of %s(%d): %.3fs" % (image_file, count, elapse))
|
||||
dt_num = len(dt_boxes)
|
||||
bbox_list = []
|
||||
for dno in range(dt_num):
|
||||
box = dt_boxes[dno]
|
||||
text, score = rec_res[dno]
|
||||
points = []
|
||||
for tno in range(len(box)):
|
||||
points.append([box[tno][0] * 1.0, box[tno][1] * 1.0])
|
||||
bbox_list.append({
|
||||
"transcription": text,
|
||||
"points": points,
|
||||
"scores": score * 1.0
|
||||
})
|
||||
otstr = image_name + "\t" + json.dumps(bbox_list) + "\n"
|
||||
fout.write(otstr.encode('utf-8'))
|
||||
avg_time = total_time_all / count
|
||||
logger.info("avg_time: {0}".format(avg_time))
|
||||
logger.info("avg_fps: {0}".format(1.0 / avg_time))
|
||||
fout.close()
|
|
@ -0,0 +1,72 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import utility
|
||||
from ppocr.utils.utility import initial_logger
|
||||
logger = initial_logger()
|
||||
import cv2
|
||||
import predict_system
|
||||
import copy
|
||||
import numpy as np
|
||||
import math
|
||||
import time
|
||||
import json
|
||||
import os
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = utility.parse_args()
|
||||
text_sys = predict_system.TextSystem(args)
|
||||
|
||||
image_file_list = []
|
||||
img_set_path = "/paddle/code/dyn/test_imgs/rctw_samples/"
|
||||
image_file_list = os.listdir(img_set_path)
|
||||
|
||||
total_time_all = 0
|
||||
count = 0
|
||||
save_path = "./output/predict.txt"
|
||||
fout = open(save_path, "wb")
|
||||
for image_name in image_file_list:
|
||||
image_file = img_set_path + image_name
|
||||
img = cv2.imread(image_file)
|
||||
if img is None:
|
||||
logger.info("error in loading image:{}".format(image_file))
|
||||
continue
|
||||
count += 1
|
||||
starttime = time.time()
|
||||
dt_boxes, rec_res = text_sys(img)
|
||||
if dt_boxes is None:
|
||||
count -= 1
|
||||
continue
|
||||
elapse = time.time() - starttime
|
||||
total_time_all += elapse
|
||||
print("Predict time of %s(%d): %.3fs" % (image_file, count, elapse))
|
||||
dt_num = len(dt_boxes)
|
||||
bbox_list = []
|
||||
for dno in range(dt_num):
|
||||
box = dt_boxes[dno]
|
||||
text, score = rec_res[dno]
|
||||
points = []
|
||||
for tno in range(len(box)):
|
||||
points.append([box[tno][0] * 1.0, box[tno][1] * 1.0])
|
||||
bbox_list.append({
|
||||
"transcription": text,
|
||||
"points": points,
|
||||
"scores": score * 1.0
|
||||
})
|
||||
otstr = image_name + "\t" + json.dumps(bbox_list) + "\n"
|
||||
fout.write(otstr.encode('utf-8'))
|
||||
avg_time = total_time_all / count
|
||||
logger.info("avg_time: {0}".format(avg_time))
|
||||
logger.info("avg_fps: {0}".format(1.0 / avg_time))
|
||||
fout.close()
|
|
@ -0,0 +1,115 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import utility
|
||||
from ppocr.utils.utility import initial_logger
|
||||
logger = initial_logger()
|
||||
import cv2
|
||||
|
||||
import copy
|
||||
import numpy as np
|
||||
import math
|
||||
import time
|
||||
from ppocr.utils.character import CharacterOps
|
||||
|
||||
|
||||
class TextRecognizer(object):
|
||||
def __init__(self, args):
|
||||
self.predictor, self.input_tensor, self.output_tensors =\
|
||||
utility.create_predictor(args, mode="rec")
|
||||
image_shape = [int(v) for v in args.rec_image_shape.split(",")]
|
||||
self.rec_image_shape = image_shape
|
||||
char_ops_params = {}
|
||||
char_ops_params["character_type"] = args.rec_char_type
|
||||
char_ops_params["character_dict_path"] = args.rec_char_dict_path
|
||||
char_ops_params['loss_type'] = 'ctc'
|
||||
self.char_ops = CharacterOps(char_ops_params)
|
||||
|
||||
def resize_norm_img(self, img):
|
||||
imgC, imgH, imgW = self.rec_image_shape
|
||||
h = img.shape[0]
|
||||
w = img.shape[1]
|
||||
ratio = w / float(h)
|
||||
if math.ceil(imgH * ratio) > imgW:
|
||||
resized_w = imgW
|
||||
else:
|
||||
resized_w = int(math.ceil(imgH * ratio))
|
||||
resized_image = cv2.resize(img, (resized_w, imgH))
|
||||
resized_image = resized_image.astype('float32')
|
||||
resized_image = resized_image.transpose((2, 0, 1)) / 255
|
||||
resized_image -= 0.5
|
||||
resized_image /= 0.5
|
||||
padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
|
||||
padding_im[:, :, 0:resized_w] = resized_image
|
||||
return padding_im
|
||||
|
||||
def __call__(self, img_list):
|
||||
img_num = len(img_list)
|
||||
batch_num = 15
|
||||
rec_res = []
|
||||
predict_time = 0
|
||||
for beg_img_no in range(0, img_num, batch_num):
|
||||
end_img_no = min(img_num, beg_img_no + batch_num)
|
||||
norm_img_batch = []
|
||||
for ino in range(beg_img_no, end_img_no):
|
||||
norm_img = self.resize_norm_img(img_list[ino])
|
||||
norm_img = norm_img[np.newaxis, :]
|
||||
norm_img_batch.append(norm_img)
|
||||
norm_img_batch = np.concatenate(norm_img_batch)
|
||||
norm_img_batch = norm_img_batch.copy()
|
||||
starttime = time.time()
|
||||
self.input_tensor.copy_from_cpu(norm_img_batch)
|
||||
self.predictor.zero_copy_run()
|
||||
rec_idx_batch = self.output_tensors[0].copy_to_cpu()
|
||||
rec_idx_lod = self.output_tensors[0].lod()[0]
|
||||
predict_batch = self.output_tensors[1].copy_to_cpu()
|
||||
predict_lod = self.output_tensors[1].lod()[0]
|
||||
elapse = time.time() - starttime
|
||||
predict_time += elapse
|
||||
starttime = time.time()
|
||||
for rno in range(len(rec_idx_lod) - 1):
|
||||
beg = rec_idx_lod[rno]
|
||||
end = rec_idx_lod[rno + 1]
|
||||
rec_idx_tmp = rec_idx_batch[beg:end, 0]
|
||||
preds_text = self.char_ops.decode(rec_idx_tmp)
|
||||
beg = predict_lod[rno]
|
||||
end = predict_lod[rno + 1]
|
||||
probs = predict_batch[beg:end, :]
|
||||
ind = np.argmax(probs, axis=1)
|
||||
blank = probs.shape[1]
|
||||
valid_ind = np.where(ind != (blank - 1))[0]
|
||||
score = np.mean(probs[valid_ind, ind[valid_ind]])
|
||||
rec_res.append([preds_text, score])
|
||||
return rec_res, predict_time
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = utility.parse_args()
|
||||
image_file_list = utility.get_image_file_list(args.image_dir)
|
||||
text_recognizer = TextRecognizer(args)
|
||||
valid_image_file_list = []
|
||||
img_list = []
|
||||
for image_file in image_file_list:
|
||||
img = cv2.imread(image_file)
|
||||
if img is None:
|
||||
logger.info("error in loading image:{}".format(image_file))
|
||||
continue
|
||||
valid_image_file_list.append(image_file)
|
||||
img_list.append(img)
|
||||
rec_res, predict_time = text_recognizer(img_list)
|
||||
rec_res, predict_time = text_recognizer(img_list)
|
||||
for ino in range(len(img_list)):
|
||||
print("Predicts of %s:%s" % (valid_image_file_list[ino], rec_res[ino]))
|
||||
print("Total predict time for %d images:%.3f" %
|
||||
(len(img_list), predict_time))
|
|
@ -0,0 +1,97 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import utility
|
||||
from ppocr.utils.utility import initial_logger
|
||||
logger = initial_logger()
|
||||
import cv2
|
||||
import predict_det
|
||||
import predict_rec
|
||||
import copy
|
||||
import numpy as np
|
||||
import math
|
||||
import time
|
||||
|
||||
|
||||
class TextSystem(object):
|
||||
def __init__(self, args):
|
||||
self.text_detector = predict_det.TextDetector(args)
|
||||
self.text_recognizer = predict_rec.TextRecognizer(args)
|
||||
|
||||
def get_rotate_crop_image(self, img, points):
|
||||
img_height, img_width = img.shape[0:2]
|
||||
left = int(np.min(points[:, 0]))
|
||||
right = int(np.max(points[:, 0]))
|
||||
top = int(np.min(points[:, 1]))
|
||||
bottom = int(np.max(points[:, 1]))
|
||||
img_crop = img[top:bottom, left:right, :].copy()
|
||||
points[:, 0] = points[:, 0] - left
|
||||
points[:, 1] = points[:, 1] - top
|
||||
img_crop_width = int(np.linalg.norm(points[0] - points[1]))
|
||||
img_crop_height = int(np.linalg.norm(points[0] - points[3]))
|
||||
pts_std = np.float32([[0, 0], [img_crop_width, 0],\
|
||||
[img_crop_width, img_crop_height], [0, img_crop_height]])
|
||||
M = cv2.getPerspectiveTransform(points, pts_std)
|
||||
dst_img = cv2.warpPerspective(
|
||||
img_crop,
|
||||
M, (img_crop_width, img_crop_height),
|
||||
borderMode=cv2.BORDER_REPLICATE)
|
||||
dst_img_height, dst_img_width = dst_img.shape[0:2]
|
||||
if dst_img_height * 1.0 / dst_img_width >= 1.5:
|
||||
dst_img = np.rot90(dst_img)
|
||||
return dst_img
|
||||
|
||||
def print_draw_crop_rec_res(self, img_crop_list, rec_res):
|
||||
bbox_num = len(img_crop_list)
|
||||
for bno in range(bbox_num):
|
||||
cv2.imwrite("./output/img_crop_%d.jpg" % bno, img_crop_list[bno])
|
||||
print(bno, rec_res[bno])
|
||||
|
||||
def __call__(self, img):
|
||||
ori_im = img.copy()
|
||||
dt_boxes, elapse = self.text_detector(img)
|
||||
if dt_boxes is None:
|
||||
return None, None
|
||||
img_crop_list = []
|
||||
for bno in range(len(dt_boxes)):
|
||||
tmp_box = copy.deepcopy(dt_boxes[bno])
|
||||
img_crop = self.get_rotate_crop_image(ori_im, tmp_box)
|
||||
img_crop_list.append(img_crop)
|
||||
rec_res, elapse = self.text_recognizer(img_crop_list)
|
||||
# self.print_draw_crop_rec_res(img_crop_list, rec_res)
|
||||
return dt_boxes, rec_res
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = utility.parse_args()
|
||||
image_file_list = utility.get_image_file_list(args.image_dir)
|
||||
text_sys = TextSystem(args)
|
||||
for image_file in image_file_list:
|
||||
img = cv2.imread(image_file)
|
||||
if img is None:
|
||||
logger.info("error in loading image:{}".format(image_file))
|
||||
continue
|
||||
starttime = time.time()
|
||||
dt_boxes, rec_res = text_sys(img)
|
||||
elapse = time.time() - starttime
|
||||
print("Predict time of %s: %.3fs" % (image_file, elapse))
|
||||
dt_num = len(dt_boxes)
|
||||
dt_boxes_final = []
|
||||
for dno in range(dt_num):
|
||||
text, score = rec_res[dno]
|
||||
if score >= 0:
|
||||
text_str = "%s, %.3f" % (text, score)
|
||||
print(text_str)
|
||||
dt_boxes_final.append(dt_boxes[dno])
|
||||
utility.draw_text_det_res(dt_boxes_final, image_file)
|
|
@ -0,0 +1,147 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import argparse
|
||||
import os, sys
|
||||
from ppocr.utils.utility import initial_logger
|
||||
logger = initial_logger()
|
||||
from paddle.fluid.core import PaddleTensor
|
||||
from paddle.fluid.core import AnalysisConfig
|
||||
from paddle.fluid.core import create_paddle_predictor
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
|
||||
def parse_args():
|
||||
def str2bool(v):
|
||||
return v.lower() in ("true", "t", "1")
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
#params for prediction engine
|
||||
parser.add_argument("--use_gpu", type=str2bool, default=True)
|
||||
parser.add_argument("--ir_optim", type=str2bool, default=True)
|
||||
parser.add_argument("--use_tensorrt", type=str2bool, default=False)
|
||||
parser.add_argument("--gpu_mem", type=int, default=8000)
|
||||
|
||||
#params for text detector
|
||||
parser.add_argument("--image_dir", type=str)
|
||||
parser.add_argument("--det_algorithm", type=str, default='DB')
|
||||
parser.add_argument("--det_model_dir", type=str)
|
||||
parser.add_argument("--det_max_side_len", type=float, default=960)
|
||||
|
||||
#DB parmas
|
||||
parser.add_argument("--det_db_thresh", type=float, default=0.3)
|
||||
parser.add_argument("--det_db_box_thresh", type=float, default=0.5)
|
||||
|
||||
#EAST parmas
|
||||
parser.add_argument("--det_east_score_thresh", type=float, default=0.8)
|
||||
parser.add_argument("--det_east_cover_thresh", type=float, default=0.1)
|
||||
parser.add_argument("--det_east_nms_thresh", type=float, default=0.2)
|
||||
|
||||
#params for text recognizer
|
||||
parser.add_argument("--rec_algorithm", type=str, default='CRNN')
|
||||
parser.add_argument("--rec_model_dir", type=str)
|
||||
parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320")
|
||||
parser.add_argument("--rec_char_type", type=str, default='ch')
|
||||
parser.add_argument(
|
||||
"--rec_char_dict_path",
|
||||
type=str,
|
||||
default="./ppocr/utils/ppocr_keys_v1.txt")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def get_image_file_list(image_dir):
|
||||
image_file_list = []
|
||||
if image_dir is None:
|
||||
return image_file_list
|
||||
if os.path.isfile(image_dir):
|
||||
image_file_list = [image_dir]
|
||||
elif os.path.isdir(image_dir):
|
||||
for single_file in os.listdir(image_dir):
|
||||
image_file_list.append(os.path.join(image_dir, single_file))
|
||||
return image_file_list
|
||||
|
||||
|
||||
def create_predictor(args, mode):
|
||||
if mode == "det":
|
||||
model_dir = args.det_model_dir
|
||||
else:
|
||||
model_dir = args.rec_model_dir
|
||||
|
||||
if model_dir is None:
|
||||
logger.info("not find {} model file path {}".format(mode, model_dir))
|
||||
sys.exit(0)
|
||||
model_file_path = model_dir + "/model"
|
||||
params_file_path = model_dir + "/params"
|
||||
if not os.path.exists(model_file_path):
|
||||
logger.info("not find model file path {}".format(model_file_path))
|
||||
sys.exit(0)
|
||||
if not os.path.exists(params_file_path):
|
||||
logger.info("not find params file path {}".format(params_file_path))
|
||||
sys.exit(0)
|
||||
|
||||
config = AnalysisConfig(model_file_path, params_file_path)
|
||||
|
||||
if args.use_gpu:
|
||||
config.enable_use_gpu(args.gpu_mem, 0)
|
||||
else:
|
||||
config.disable_gpu()
|
||||
|
||||
config.disable_glog_info()
|
||||
config.switch_ir_optim(args.ir_optim)
|
||||
# if args.use_tensorrt:
|
||||
# config.enable_tensorrt_engine(
|
||||
# precision_mode=AnalysisConfig.Precision.Half
|
||||
# if args.use_fp16 else AnalysisConfig.Precision.Float32,
|
||||
# max_batch_size=args.batch_size)
|
||||
|
||||
config.enable_memory_optim()
|
||||
# use zero copy
|
||||
config.switch_use_feed_fetch_ops(False)
|
||||
predictor = create_paddle_predictor(config)
|
||||
input_names = predictor.get_input_names()
|
||||
input_tensor = predictor.get_input_tensor(input_names[0])
|
||||
output_names = predictor.get_output_names()
|
||||
output_tensors = []
|
||||
for output_name in output_names:
|
||||
output_tensor = predictor.get_output_tensor(output_name)
|
||||
output_tensors.append(output_tensor)
|
||||
return predictor, input_tensor, output_tensors
|
||||
|
||||
|
||||
def draw_text_det_res(dt_boxes, img_path):
|
||||
src_im = cv2.imread(img_path)
|
||||
for box in dt_boxes:
|
||||
box = np.array(box).astype(np.int32).reshape(-1, 2)
|
||||
cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2)
|
||||
img_name_pure = img_path.split("/")[-1]
|
||||
cv2.imwrite("./output/%s" % img_name_pure, src_im)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = parse_args()
|
||||
args.use_gpu = False
|
||||
root_path = "/Users/liuweiwei06/Desktop/TEST_CODES/icode/baidu/personal-code/PaddleOCR/"
|
||||
args.det_model_dir = root_path + "test_models/public_v1/ch_det_mv3_db"
|
||||
|
||||
predictor, input_tensor, output_tensors = create_predictor(args, mode='det')
|
||||
print(predictor.get_input_names())
|
||||
print(predictor.get_output_names())
|
||||
print(predictor.program(), file=open("det_program.txt", 'w'))
|
||||
|
||||
args.rec_model_dir = root_path + "test_models/public_v1/ch_rec_mv3_crnn/"
|
||||
rec_predictor, input_tensor, output_tensors = create_predictor(
|
||||
args, mode='rec')
|
||||
print(rec_predictor.get_input_names())
|
||||
print(rec_predictor.get_output_names())
|
|
@ -0,0 +1,125 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import time
|
||||
import multiprocessing
|
||||
import numpy as np
|
||||
|
||||
|
||||
def set_paddle_flags(**kwargs):
|
||||
for key, value in kwargs.items():
|
||||
if os.environ.get(key, None) is None:
|
||||
os.environ[key] = str(value)
|
||||
|
||||
|
||||
# NOTE(paddle-dev): All of these flags should be
|
||||
# set before `import paddle`. Otherwise, it would
|
||||
# not take any effect.
|
||||
set_paddle_flags(
|
||||
FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
|
||||
)
|
||||
|
||||
from paddle import fluid
|
||||
|
||||
# from ppocr.utils.utility import load_config, merge_config
|
||||
from ppocr.data.reader_main import test_reader
|
||||
import program
|
||||
from paddle import fluid
|
||||
from ppocr.utils.utility import initial_logger
|
||||
logger = initial_logger()
|
||||
from ppocr.data.reader_main import reader_main
|
||||
from ppocr.utils.save_load import init_model
|
||||
from ppocr.utils.character import CharacterOps
|
||||
from ppocr.utils.utility import create_module
|
||||
|
||||
logger = initial_logger()
|
||||
|
||||
|
||||
def main():
|
||||
config = program.load_config(FLAGS.config)
|
||||
program.merge_config(FLAGS.opt)
|
||||
logger.info(config)
|
||||
char_ops = CharacterOps(config['Global'])
|
||||
config['Global']['char_ops'] = char_ops
|
||||
|
||||
# check if set use_gpu=True in paddlepaddle cpu version
|
||||
use_gpu = config['Global']['use_gpu']
|
||||
# check_gpu(use_gpu)
|
||||
|
||||
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
|
||||
exe = fluid.Executor(place)
|
||||
|
||||
rec_model = create_module(config['Architecture']['function'])(params=config)
|
||||
|
||||
startup_prog = fluid.Program()
|
||||
eval_prog = fluid.Program()
|
||||
with fluid.program_guard(eval_prog, startup_prog):
|
||||
with fluid.unique_name.guard():
|
||||
_, outputs = rec_model(mode="test")
|
||||
fetch_name_list = list(outputs.keys())
|
||||
fetch_varname_list = [outputs[v].name for v in fetch_name_list]
|
||||
eval_prog = eval_prog.clone(for_test=True)
|
||||
exe.run(startup_prog)
|
||||
|
||||
init_model(config, eval_prog, exe)
|
||||
|
||||
blobs = reader_main(config, 'test')
|
||||
imgs = next(blobs())
|
||||
for img in imgs:
|
||||
predict = exe.run(program=eval_prog,
|
||||
feed={"image": img},
|
||||
fetch_list=fetch_varname_list,
|
||||
return_numpy=False)
|
||||
|
||||
preds = np.array(predict[0])
|
||||
if preds.shape[1] == 1:
|
||||
preds = preds.reshape(-1)
|
||||
preds_lod = predict[0].lod()[0]
|
||||
preds_text = char_ops.decode(preds)
|
||||
else:
|
||||
end_pos = np.where(preds[0, :] == 1)[0]
|
||||
if len(end_pos) <= 1:
|
||||
preds_text = preds[0, 1:]
|
||||
else:
|
||||
preds_text = preds[0, 1:end_pos[1]]
|
||||
preds_text = preds_text.reshape(-1)
|
||||
preds_text = char_ops.decode(preds_text)
|
||||
|
||||
print(preds)
|
||||
print(preds_text)
|
||||
|
||||
# save for inference model
|
||||
target_var = []
|
||||
for key, values in outputs.items():
|
||||
target_var.append(values)
|
||||
|
||||
fluid.io.save_inference_model(
|
||||
"./output/",
|
||||
feeded_var_names=['image'],
|
||||
target_vars=target_var,
|
||||
executor=exe,
|
||||
main_program=eval_prog,
|
||||
model_filename="model",
|
||||
params_filename="params")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = program.ArgsParser()
|
||||
FLAGS = parser.parse_args()
|
||||
main()
|
|
@ -0,0 +1,365 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
||||
import sys
|
||||
import yaml
|
||||
import os
|
||||
from ppocr.utils.utility import create_module
|
||||
from ppocr.utils.utility import initial_logger
|
||||
logger = initial_logger()
|
||||
|
||||
import paddle.fluid as fluid
|
||||
import time
|
||||
from ppocr.utils.stats import TrainingStats
|
||||
from eval_utils.eval_det_utils import eval_det_run
|
||||
from eval_utils.eval_rec_utils import eval_rec_run
|
||||
from ppocr.utils.save_load import save_model
|
||||
import numpy as np
|
||||
from ppocr.utils.character import cal_predicts_accuracy
|
||||
|
||||
|
||||
class ArgsParser(ArgumentParser):
|
||||
def __init__(self):
|
||||
super(ArgsParser, self).__init__(
|
||||
formatter_class=RawDescriptionHelpFormatter)
|
||||
self.add_argument("-c", "--config", help="configuration file to use")
|
||||
self.add_argument(
|
||||
"-o", "--opt", nargs='+', help="set configuration options")
|
||||
|
||||
def parse_args(self, argv=None):
|
||||
args = super(ArgsParser, self).parse_args(argv)
|
||||
assert args.config is not None, \
|
||||
"Please specify --config=configure_file_path."
|
||||
args.opt = self._parse_opt(args.opt)
|
||||
return args
|
||||
|
||||
def _parse_opt(self, opts):
|
||||
config = {}
|
||||
if not opts:
|
||||
return config
|
||||
for s in opts:
|
||||
s = s.strip()
|
||||
k, v = s.split('=')
|
||||
config[k] = yaml.load(v, Loader=yaml.Loader)
|
||||
return config
|
||||
|
||||
|
||||
class AttrDict(dict):
|
||||
"""Single level attribute dict, NOT recursive"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super(AttrDict, self).__init__()
|
||||
super(AttrDict, self).update(kwargs)
|
||||
|
||||
def __getattr__(self, key):
|
||||
if key in self:
|
||||
return self[key]
|
||||
raise AttributeError("object has no attribute '{}'".format(key))
|
||||
|
||||
|
||||
global_config = AttrDict()
|
||||
|
||||
|
||||
def load_config(file_path):
|
||||
"""
|
||||
Load config from yml/yaml file.
|
||||
|
||||
Args:
|
||||
file_path (str): Path of the config file to be loaded.
|
||||
|
||||
Returns: global config
|
||||
"""
|
||||
_, ext = os.path.splitext(file_path)
|
||||
assert ext in ['.yml', '.yaml'], "only support yaml files for now"
|
||||
merge_config(yaml.load(open(file_path), Loader=yaml.Loader))
|
||||
assert "reader_yml" in global_config['Global'],\
|
||||
"absence reader_yml in global"
|
||||
reader_file_path = global_config['Global']['reader_yml']
|
||||
_, ext = os.path.splitext(reader_file_path)
|
||||
assert ext in ['.yml', '.yaml'], "only support yaml files for reader"
|
||||
merge_config(yaml.load(open(reader_file_path), Loader=yaml.Loader))
|
||||
return global_config
|
||||
|
||||
|
||||
def merge_config(config):
|
||||
"""
|
||||
Merge config into global config.
|
||||
|
||||
Args:
|
||||
config (dict): Config to be merged.
|
||||
|
||||
Returns: global config
|
||||
"""
|
||||
for key, value in config.items():
|
||||
if "." not in key:
|
||||
if isinstance(value, dict) and key in global_config:
|
||||
global_config[key].update(value)
|
||||
else:
|
||||
global_config[key] = value
|
||||
else:
|
||||
sub_keys = key.split('.')
|
||||
assert (sub_keys[0] in global_config)
|
||||
cur = global_config[sub_keys[0]]
|
||||
for idx, sub_key in enumerate(sub_keys[1:]):
|
||||
assert (sub_key in cur)
|
||||
if idx == len(sub_keys) - 2:
|
||||
cur[sub_key] = value
|
||||
else:
|
||||
cur = cur[sub_key]
|
||||
|
||||
|
||||
def check_gpu(use_gpu):
|
||||
"""
|
||||
Log error and exit when set use_gpu=true in paddlepaddle
|
||||
cpu version.
|
||||
"""
|
||||
err = "Config use_gpu cannot be set as true while you are " \
|
||||
"using paddlepaddle cpu version ! \nPlease try: \n" \
|
||||
"\t1. Install paddlepaddle-gpu to run model on GPU \n" \
|
||||
"\t2. Set use_gpu as false in config file to run " \
|
||||
"model on CPU"
|
||||
|
||||
try:
|
||||
if use_gpu and not fluid.is_compiled_with_cuda():
|
||||
logger.error(err)
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
|
||||
def build(config, main_prog, startup_prog, mode):
|
||||
"""
|
||||
Build a program using a model and an optimizer
|
||||
1. create feeds
|
||||
2. create a dataloader
|
||||
3. create a model
|
||||
4. create fetchs
|
||||
5. create an optimizer
|
||||
|
||||
Args:
|
||||
config(dict): config
|
||||
main_prog(): main program
|
||||
startup_prog(): startup program
|
||||
is_train(bool): train or valid
|
||||
|
||||
Returns:
|
||||
dataloader(): a bridge between the model and the data
|
||||
fetchs(dict): dict of model outputs(included loss and measures)
|
||||
"""
|
||||
with fluid.program_guard(main_prog, startup_prog):
|
||||
with fluid.unique_name.guard():
|
||||
func_infor = config['Architecture']['function']
|
||||
model = create_module(func_infor)(params=config)
|
||||
dataloader, outputs = model(mode=mode)
|
||||
fetch_name_list = list(outputs.keys())
|
||||
fetch_varname_list = [outputs[v].name for v in fetch_name_list]
|
||||
opt_loss_name = None
|
||||
if mode == "train":
|
||||
opt_loss = outputs['total_loss']
|
||||
opt_params = config['Optimizer']
|
||||
optimizer = create_module(opt_params['function'])(opt_params)
|
||||
optimizer.minimize(opt_loss)
|
||||
opt_loss_name = opt_loss.name
|
||||
global_lr = optimizer._global_learning_rate()
|
||||
global_lr.persistable = True
|
||||
fetch_name_list.insert(0, "lr")
|
||||
fetch_varname_list.insert(0, global_lr.name)
|
||||
return (dataloader, fetch_name_list, fetch_varname_list, opt_loss_name)
|
||||
|
||||
|
||||
def build_export(config, main_prog, startup_prog):
|
||||
"""
|
||||
Build a program using a model and an optimizer
|
||||
1. create feeds
|
||||
2. create a dataloader
|
||||
3. create a model
|
||||
4. create fetchs
|
||||
5. create an optimizer
|
||||
|
||||
Args:
|
||||
config(dict): config
|
||||
main_prog(): main program
|
||||
startup_prog(): startup program
|
||||
is_train(bool): train or valid
|
||||
|
||||
Returns:
|
||||
dataloader(): a bridge between the model and the data
|
||||
fetchs(dict): dict of model outputs(included loss and measures)
|
||||
"""
|
||||
with fluid.program_guard(main_prog, startup_prog):
|
||||
with fluid.unique_name.guard():
|
||||
func_infor = config['Architecture']['function']
|
||||
model = create_module(func_infor)(params=config)
|
||||
image, outputs = model(mode='export')
|
||||
fetches_var = [outputs[name] for name in outputs]
|
||||
fetches_var_name = [name for name in outputs]
|
||||
feeded_var_names = [image.name]
|
||||
target_vars = fetches_var
|
||||
return feeded_var_names, target_vars, fetches_var_name
|
||||
|
||||
|
||||
def create_multi_devices_program(program, loss_var_name):
|
||||
build_strategy = fluid.BuildStrategy()
|
||||
build_strategy.memory_optimize = False
|
||||
build_strategy.enable_inplace = True
|
||||
exec_strategy = fluid.ExecutionStrategy()
|
||||
exec_strategy.num_iteration_per_drop_scope = 1
|
||||
compile_program = fluid.CompiledProgram(program).with_data_parallel(
|
||||
loss_name=loss_var_name,
|
||||
build_strategy=build_strategy,
|
||||
exec_strategy=exec_strategy)
|
||||
return compile_program
|
||||
|
||||
|
||||
def train_eval_det_run(config, exe, train_info_dict, eval_info_dict):
|
||||
train_batch_id = 0
|
||||
log_smooth_window = config['Global']['log_smooth_window']
|
||||
epoch_num = config['Global']['epoch_num']
|
||||
print_batch_step = config['Global']['print_batch_step']
|
||||
eval_batch_step = config['Global']['eval_batch_step']
|
||||
save_epoch_step = config['Global']['save_epoch_step']
|
||||
save_model_dir = config['Global']['save_model_dir']
|
||||
train_stats = TrainingStats(log_smooth_window,
|
||||
train_info_dict['fetch_name_list'])
|
||||
best_eval_hmean = -1
|
||||
best_batch_id = 0
|
||||
best_epoch = 0
|
||||
train_loader = train_info_dict['reader']
|
||||
for epoch in range(epoch_num):
|
||||
train_loader.start()
|
||||
try:
|
||||
while True:
|
||||
t1 = time.time()
|
||||
train_outs = exe.run(
|
||||
program=train_info_dict['compile_program'],
|
||||
fetch_list=train_info_dict['fetch_varname_list'],
|
||||
return_numpy=False)
|
||||
stats = {}
|
||||
for tno in range(len(train_outs)):
|
||||
fetch_name = train_info_dict['fetch_name_list'][tno]
|
||||
fetch_value = np.mean(np.array(train_outs[tno]))
|
||||
stats[fetch_name] = fetch_value
|
||||
t2 = time.time()
|
||||
train_batch_elapse = t2 - t1
|
||||
train_stats.update(stats)
|
||||
if train_batch_id > 0 and train_batch_id \
|
||||
% print_batch_step == 0:
|
||||
logs = train_stats.log()
|
||||
strs = 'epoch: {}, iter: {}, {}, time: {:.3f}'.format(
|
||||
epoch, train_batch_id, logs, train_batch_elapse)
|
||||
logger.info(strs)
|
||||
|
||||
if train_batch_id > 0 and\
|
||||
train_batch_id % eval_batch_step == 0:
|
||||
metrics = eval_det_run(exe, config, eval_info_dict, "eval")
|
||||
hmean = metrics['hmean']
|
||||
if hmean >= best_eval_hmean:
|
||||
best_eval_hmean = hmean
|
||||
best_batch_id = train_batch_id
|
||||
best_epoch = epoch
|
||||
save_path = save_model_dir + "/best_accuracy"
|
||||
save_model(train_info_dict['train_program'], save_path)
|
||||
strs = 'Test iter: {}, metrics:{}, best_hmean:{:.6f}, best_epoch:{}, best_batch_id:{}'.format(
|
||||
train_batch_id, metrics, best_eval_hmean, best_epoch,
|
||||
best_batch_id)
|
||||
logger.info(strs)
|
||||
train_batch_id += 1
|
||||
|
||||
except fluid.core.EOFException:
|
||||
train_loader.reset()
|
||||
|
||||
if epoch > 0 and epoch % save_epoch_step == 0:
|
||||
save_path = save_model_dir + "/iter_epoch_%d" % (epoch)
|
||||
save_model(train_info_dict['train_program'], save_path)
|
||||
return
|
||||
|
||||
|
||||
def train_eval_rec_run(config, exe, train_info_dict, eval_info_dict):
|
||||
train_batch_id = 0
|
||||
log_smooth_window = config['Global']['log_smooth_window']
|
||||
epoch_num = config['Global']['epoch_num']
|
||||
print_batch_step = config['Global']['print_batch_step']
|
||||
eval_batch_step = config['Global']['eval_batch_step']
|
||||
save_epoch_step = config['Global']['save_epoch_step']
|
||||
save_model_dir = config['Global']['save_model_dir']
|
||||
train_stats = TrainingStats(log_smooth_window, ['loss', 'acc'])
|
||||
best_eval_acc = -1
|
||||
best_batch_id = 0
|
||||
best_epoch = 0
|
||||
train_loader = train_info_dict['reader']
|
||||
for epoch in range(epoch_num):
|
||||
train_loader.start()
|
||||
try:
|
||||
while True:
|
||||
t1 = time.time()
|
||||
train_outs = exe.run(
|
||||
program=train_info_dict['compile_program'],
|
||||
fetch_list=train_info_dict['fetch_varname_list'],
|
||||
return_numpy=False)
|
||||
fetch_map = dict(
|
||||
zip(train_info_dict['fetch_name_list'],
|
||||
range(len(train_outs))))
|
||||
|
||||
loss = np.mean(np.array(train_outs[fetch_map['total_loss']]))
|
||||
lr = np.mean(np.array(train_outs[fetch_map['lr']]))
|
||||
preds_idx = fetch_map['decoded_out']
|
||||
preds = np.array(train_outs[preds_idx])
|
||||
preds_lod = train_outs[preds_idx].lod()[0]
|
||||
labels_idx = fetch_map['label']
|
||||
labels = np.array(train_outs[labels_idx])
|
||||
labels_lod = train_outs[labels_idx].lod()[0]
|
||||
|
||||
acc, acc_num, img_num = cal_predicts_accuracy(
|
||||
config['Global']['char_ops'], preds, preds_lod, labels,
|
||||
labels_lod)
|
||||
t2 = time.time()
|
||||
train_batch_elapse = t2 - t1
|
||||
stats = {'loss': loss, 'acc': acc}
|
||||
train_stats.update(stats)
|
||||
if train_batch_id > 0 and train_batch_id \
|
||||
% print_batch_step == 0:
|
||||
logs = train_stats.log()
|
||||
strs = 'epoch: {}, iter: {}, lr: {:.6f}, {}, time: {:.3f}'.format(
|
||||
epoch, train_batch_id, lr, logs, train_batch_elapse)
|
||||
logger.info(strs)
|
||||
|
||||
if train_batch_id > 0 and\
|
||||
train_batch_id % eval_batch_step == 0:
|
||||
metrics = eval_rec_run(exe, config, eval_info_dict, "eval")
|
||||
eval_acc = metrics['avg_acc']
|
||||
eval_sample_num = metrics['total_sample_num']
|
||||
if eval_acc > best_eval_acc:
|
||||
best_eval_acc = eval_acc
|
||||
best_batch_id = train_batch_id
|
||||
best_epoch = epoch
|
||||
save_path = save_model_dir + "/best_accuracy"
|
||||
save_model(train_info_dict['train_program'], save_path)
|
||||
strs = 'Test iter: {}, acc:{:.6f}, best_acc:{:.6f}, best_epoch:{}, best_batch_id:{}, eval_sample_num:{}'.format(
|
||||
train_batch_id, eval_acc, best_eval_acc, best_epoch,
|
||||
best_batch_id, eval_sample_num)
|
||||
logger.info(strs)
|
||||
train_batch_id += 1
|
||||
|
||||
except fluid.core.EOFException:
|
||||
train_loader.reset()
|
||||
|
||||
if epoch > 0 and epoch % save_epoch_step == 0:
|
||||
save_path = save_model_dir + "/iter_epoch_%d" % (epoch)
|
||||
save_model(train_info_dict['train_program'], save_path)
|
||||
return
|
|
@ -0,0 +1,134 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy as np
|
||||
from copy import deepcopy
|
||||
import json
|
||||
|
||||
# from paddle.fluid.contrib.model_stat import summary
|
||||
|
||||
|
||||
def set_paddle_flags(**kwargs):
|
||||
for key, value in kwargs.items():
|
||||
if os.environ.get(key, None) is None:
|
||||
os.environ[key] = str(value)
|
||||
|
||||
|
||||
# NOTE(paddle-dev): All of these flags should be
|
||||
# set before `import paddle`. Otherwise, it would
|
||||
# not take any effect.
|
||||
set_paddle_flags(
|
||||
FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
|
||||
)
|
||||
|
||||
from paddle import fluid
|
||||
from ppocr.utils.utility import create_module
|
||||
from ppocr.utils.utility import load_config, merge_config
|
||||
import ppocr.data.det.reader_main as reader
|
||||
from ppocr.utils.utility import ArgsParser
|
||||
from ppocr.utils.check import check_gpu
|
||||
from ppocr.utils.checkpoint import load_pretrain, load_checkpoint, save, save_model
|
||||
|
||||
from ppocr.utils.utility import initial_logger
|
||||
logger = initial_logger()
|
||||
from ppocr.utils.eval_utils import eval_det_run
|
||||
|
||||
|
||||
def draw_det_res(dt_boxes, config, img_name, ino):
|
||||
if len(dt_boxes) > 0:
|
||||
img_set_path = config['TestReader']['img_set_dir']
|
||||
img_path = img_set_path + img_name
|
||||
import cv2
|
||||
src_im = cv2.imread(img_path)
|
||||
for box in dt_boxes:
|
||||
box = box.astype(np.int32).reshape((-1, 1, 2))
|
||||
cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2)
|
||||
cv2.imwrite("tmp%d.jpg" % ino, src_im)
|
||||
|
||||
|
||||
def main():
|
||||
config = load_config(FLAGS.config)
|
||||
merge_config(FLAGS.opt)
|
||||
print(config)
|
||||
|
||||
# check if set use_gpu=True in paddlepaddle cpu version
|
||||
use_gpu = config['Global']['use_gpu']
|
||||
check_gpu(use_gpu)
|
||||
|
||||
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
|
||||
exe = fluid.Executor(place)
|
||||
|
||||
det_model = create_module(config['Architecture']['function'])(params=config)
|
||||
|
||||
startup_prog = fluid.Program()
|
||||
eval_prog = fluid.Program()
|
||||
with fluid.program_guard(eval_prog, startup_prog):
|
||||
with fluid.unique_name.guard():
|
||||
eval_loader, eval_outputs = det_model(mode="test")
|
||||
eval_fetch_list = [v.name for v in eval_outputs]
|
||||
eval_prog = eval_prog.clone(for_test=True)
|
||||
exe.run(startup_prog)
|
||||
|
||||
pretrain_weights = config['Global']['pretrain_weights']
|
||||
if pretrain_weights is not None:
|
||||
load_pretrain(exe, eval_prog, pretrain_weights)
|
||||
# fluid.load(eval_prog, pretrain_weights)
|
||||
# def if_exist(var):
|
||||
# return os.path.exists(os.path.join(pretrain_weights, var.name))
|
||||
# fluid.io.load_vars(exe, pretrain_weights, predicate=if_exist, main_program=eval_prog)
|
||||
else:
|
||||
logger.info("Not find pretrain_weights:%s" % pretrain_weights)
|
||||
sys.exit(0)
|
||||
|
||||
# fluid.io.save_inference_model("./output/", feeded_var_names=['image'],
|
||||
# target_vars=eval_outputs, executor=exe, main_program=eval_prog,
|
||||
# model_filename="model", params_filename="params")
|
||||
# sys.exit(-1)
|
||||
|
||||
metrics = eval_det_run(exe, eval_prog, eval_fetch_list, config, "test")
|
||||
logger.info("metrics:{}".format(metrics))
|
||||
logger.info("success!")
|
||||
|
||||
|
||||
def test_reader():
|
||||
config = load_config(FLAGS.config)
|
||||
merge_config(FLAGS.opt)
|
||||
print(config)
|
||||
tmp_reader = reader.test_reader(config=config)
|
||||
count = 0
|
||||
print_count = 0
|
||||
import time
|
||||
starttime = time.time()
|
||||
for data in tmp_reader():
|
||||
count += len(data)
|
||||
print_count += 1
|
||||
if print_count % 10 == 0:
|
||||
batch_time = (time.time() - starttime) / print_count
|
||||
print("reader:", count, len(data), batch_time)
|
||||
print("finish reader:", count)
|
||||
print("success")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = ArgsParser()
|
||||
FLAGS = parser.parse_args()
|
||||
main()
|
||||
# test_reader()
|
|
@ -0,0 +1,160 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy as np
|
||||
from copy import deepcopy
|
||||
import json
|
||||
|
||||
# from paddle.fluid.contrib.model_stat import summary
|
||||
|
||||
|
||||
def set_paddle_flags(**kwargs):
|
||||
for key, value in kwargs.items():
|
||||
if os.environ.get(key, None) is None:
|
||||
os.environ[key] = str(value)
|
||||
|
||||
|
||||
# NOTE(paddle-dev): All of these flags should be
|
||||
# set before `import paddle`. Otherwise, it would
|
||||
# not take any effect.
|
||||
set_paddle_flags(
|
||||
FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
|
||||
)
|
||||
|
||||
from paddle import fluid
|
||||
from ppocr.utils.utility import create_module
|
||||
from ppocr.utils.utility import load_config, merge_config
|
||||
import ppocr.data.det.reader_main as reader
|
||||
from ppocr.utils.utility import ArgsParser
|
||||
from ppocr.utils.check import check_gpu
|
||||
from ppocr.utils.checkpoint import load_pretrain, load_checkpoint, save, save_model
|
||||
|
||||
from ppocr.utils.utility import initial_logger
|
||||
logger = initial_logger()
|
||||
from ppocr.utils.eval_utils import eval_det_run
|
||||
|
||||
|
||||
def draw_det_res(dt_boxes, config, img_name, ino):
|
||||
if len(dt_boxes) > 0:
|
||||
img_set_path = config['TestReader']['img_set_dir']
|
||||
img_path = img_set_path + img_name
|
||||
import cv2
|
||||
src_im = cv2.imread(img_path)
|
||||
for box in dt_boxes:
|
||||
box = box.astype(np.int32).reshape((-1, 1, 2))
|
||||
cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2)
|
||||
cv2.imwrite("tmp%d.jpg" % ino, src_im)
|
||||
|
||||
|
||||
def main():
|
||||
config = load_config(FLAGS.config)
|
||||
merge_config(FLAGS.opt)
|
||||
print(config)
|
||||
|
||||
# check if set use_gpu=True in paddlepaddle cpu version
|
||||
use_gpu = config['Global']['use_gpu']
|
||||
check_gpu(use_gpu)
|
||||
|
||||
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
|
||||
exe = fluid.Executor(place)
|
||||
|
||||
det_model = create_module(config['Architecture']['function'])(params=config)
|
||||
|
||||
startup_prog = fluid.Program()
|
||||
eval_prog = fluid.Program()
|
||||
with fluid.program_guard(eval_prog, startup_prog):
|
||||
with fluid.unique_name.guard():
|
||||
eval_outputs = det_model(mode="test")
|
||||
eval_fetch_list = [v.name for v in eval_outputs]
|
||||
eval_prog = eval_prog.clone(for_test=True)
|
||||
exe.run(startup_prog)
|
||||
|
||||
pretrain_weights = config['Global']['pretrain_weights']
|
||||
if pretrain_weights is not None:
|
||||
fluid.load(eval_prog, pretrain_weights)
|
||||
else:
|
||||
logger.info("Not find pretrain_weights:%s" % pretrain_weights)
|
||||
sys.exit(0)
|
||||
|
||||
save_res_path = config['Global']['save_res_path']
|
||||
with open(save_res_path, "wb") as fout:
|
||||
test_reader = reader.test_reader(config=config)
|
||||
tackling_num = 0
|
||||
for data in test_reader():
|
||||
img_num = len(data)
|
||||
tackling_num = tackling_num + img_num
|
||||
logger.info("tackling_num:%d", tackling_num)
|
||||
img_list = []
|
||||
ratio_list = []
|
||||
img_name_list = []
|
||||
for ino in range(img_num):
|
||||
img_list.append(data[ino][0])
|
||||
ratio_list.append(data[ino][1])
|
||||
img_name_list.append(data[ino][2])
|
||||
img_list = np.concatenate(img_list, axis=0)
|
||||
outs = exe.run(eval_prog,\
|
||||
feed={'image': img_list},\
|
||||
fetch_list=eval_fetch_list)
|
||||
|
||||
global_params = config['Global']
|
||||
postprocess_params = deepcopy(config["PostProcess"])
|
||||
postprocess_params.update(global_params)
|
||||
postprocess = create_module(postprocess_params['function'])\
|
||||
(params=postprocess_params)
|
||||
dt_boxes_list = postprocess(outs, ratio_list)
|
||||
for ino in range(img_num):
|
||||
dt_boxes = dt_boxes_list[ino]
|
||||
img_name = img_name_list[ino]
|
||||
dt_boxes_json = []
|
||||
for box in dt_boxes:
|
||||
tmp_json = {"transcription": ""}
|
||||
tmp_json['points'] = box.tolist()
|
||||
dt_boxes_json.append(tmp_json)
|
||||
otstr = img_name + "\t" + json.dumps(dt_boxes_json) + "\n"
|
||||
fout.write(otstr.encode())
|
||||
#draw_det_res(dt_boxes, config, img_name, ino)
|
||||
logger.info("success!")
|
||||
|
||||
|
||||
def test_reader():
|
||||
config = load_config(FLAGS.config)
|
||||
merge_config(FLAGS.opt)
|
||||
print(config)
|
||||
tmp_reader = reader.test_reader(config=config)
|
||||
count = 0
|
||||
print_count = 0
|
||||
import time
|
||||
starttime = time.time()
|
||||
for data in tmp_reader():
|
||||
count += len(data)
|
||||
print_count += 1
|
||||
if print_count % 10 == 0:
|
||||
batch_time = (time.time() - starttime) / print_count
|
||||
print("reader:", count, len(data), batch_time)
|
||||
print("finish reader:", count)
|
||||
print("success")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = ArgsParser()
|
||||
FLAGS = parser.parse_args()
|
||||
main()
|
||||
# test_reader()
|
|
@ -0,0 +1,116 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import time
|
||||
import multiprocessing
|
||||
import numpy as np
|
||||
|
||||
|
||||
def set_paddle_flags(**kwargs):
|
||||
for key, value in kwargs.items():
|
||||
if os.environ.get(key, None) is None:
|
||||
os.environ[key] = str(value)
|
||||
|
||||
|
||||
# NOTE(paddle-dev): All of these flags should be
|
||||
# set before `import paddle`. Otherwise, it would
|
||||
# not take any effect.
|
||||
set_paddle_flags(
|
||||
FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
|
||||
)
|
||||
|
||||
from paddle import fluid
|
||||
|
||||
from ppocr.utils.utility import load_config, merge_config
|
||||
from ppocr.data.rec.reader_main import test_reader
|
||||
|
||||
from ppocr.utils.utility import ArgsParser
|
||||
from ppocr.utils.character import CharacterOps, cal_predicts_accuracy
|
||||
from ppocr.utils.check import check_gpu
|
||||
from ppocr.utils.utility import create_module
|
||||
|
||||
from ppocr.utils.utility import initial_logger
|
||||
logger = initial_logger()
|
||||
|
||||
|
||||
def main():
|
||||
config = load_config(FLAGS.config)
|
||||
merge_config(FLAGS.opt)
|
||||
char_ops = CharacterOps(config['Global'])
|
||||
config['Global']['char_num'] = char_ops.get_char_num()
|
||||
|
||||
# check if set use_gpu=True in paddlepaddle cpu version
|
||||
use_gpu = config['Global']['use_gpu']
|
||||
check_gpu(use_gpu)
|
||||
|
||||
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
|
||||
exe = fluid.Executor(place)
|
||||
|
||||
rec_model = create_module(config['Architecture']['function'])(params=config)
|
||||
|
||||
startup_prog = fluid.Program()
|
||||
eval_prog = fluid.Program()
|
||||
with fluid.program_guard(eval_prog, startup_prog):
|
||||
with fluid.unique_name.guard():
|
||||
eval_outputs = rec_model(mode="test")
|
||||
eval_fetch_list = [v.name for v in eval_outputs]
|
||||
eval_prog = eval_prog.clone(for_test=True)
|
||||
exe.run(startup_prog)
|
||||
|
||||
pretrain_weights = config['Global']['pretrain_weights']
|
||||
if pretrain_weights is not None:
|
||||
fluid.load(eval_prog, pretrain_weights)
|
||||
|
||||
test_img_path = config['test_img_path']
|
||||
image_shape = config['Global']['image_shape']
|
||||
blobs = test_reader(image_shape, test_img_path)
|
||||
predict = exe.run(program=eval_prog,
|
||||
feed={"image": blobs},
|
||||
fetch_list=eval_fetch_list,
|
||||
return_numpy=False)
|
||||
preds = np.array(predict[0])
|
||||
if preds.shape[1] == 1:
|
||||
preds = preds.reshape(-1)
|
||||
preds_lod = predict[0].lod()[0]
|
||||
preds_text = char_ops.decode(preds)
|
||||
else:
|
||||
end_pos = np.where(preds[0, :] == 1)[0]
|
||||
if len(end_pos) <= 1:
|
||||
preds_text = preds[0, 1:]
|
||||
else:
|
||||
preds_text = preds[0, 1:end_pos[1]]
|
||||
preds_text = preds_text.reshape(-1)
|
||||
preds_text = char_ops.decode(preds_text)
|
||||
|
||||
fluid.io.save_inference_model(
|
||||
"./output/",
|
||||
feeded_var_names=['image'],
|
||||
target_vars=eval_outputs,
|
||||
executor=exe,
|
||||
main_program=eval_prog,
|
||||
model_filename="model",
|
||||
params_filename="params")
|
||||
print(preds)
|
||||
print(preds_text)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = ArgsParser()
|
||||
FLAGS = parser.parse_args()
|
||||
main()
|
|
@ -0,0 +1,128 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import time
|
||||
import multiprocessing
|
||||
import numpy as np
|
||||
|
||||
|
||||
def set_paddle_flags(**kwargs):
|
||||
for key, value in kwargs.items():
|
||||
if os.environ.get(key, None) is None:
|
||||
os.environ[key] = str(value)
|
||||
|
||||
|
||||
# NOTE(paddle-dev): All of these flags should be
|
||||
# set before `import paddle`. Otherwise, it would
|
||||
# not take any effect.
|
||||
set_paddle_flags(
|
||||
FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
|
||||
)
|
||||
|
||||
from paddle import fluid
|
||||
|
||||
from ppocr.utils.utility import load_config, merge_config
|
||||
import ppocr.data.rec.reader_main as reader
|
||||
|
||||
from ppocr.utils.utility import ArgsParser
|
||||
from ppocr.utils.character import CharacterOps, cal_predicts_accuracy
|
||||
from ppocr.utils.check import check_gpu
|
||||
from ppocr.utils.utility import create_module
|
||||
|
||||
from ppocr.utils.eval_utils import eval_run
|
||||
|
||||
from ppocr.utils.utility import initial_logger
|
||||
logger = initial_logger()
|
||||
|
||||
|
||||
def main():
|
||||
config = load_config(FLAGS.config)
|
||||
merge_config(FLAGS.opt)
|
||||
char_ops = CharacterOps(config['Global'])
|
||||
config['Global']['char_num'] = char_ops.get_char_num()
|
||||
|
||||
# check if set use_gpu=True in paddlepaddle cpu version
|
||||
use_gpu = config['Global']['use_gpu']
|
||||
check_gpu(use_gpu)
|
||||
|
||||
if use_gpu:
|
||||
devices_num = fluid.core.get_cuda_device_count()
|
||||
else:
|
||||
devices_num = int(
|
||||
os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
|
||||
|
||||
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
|
||||
exe = fluid.Executor(place)
|
||||
|
||||
rec_model = create_module(config['Architecture']['function'])(params=config)
|
||||
|
||||
startup_prog = fluid.Program()
|
||||
eval_prog = fluid.Program()
|
||||
with fluid.program_guard(eval_prog, startup_prog):
|
||||
with fluid.unique_name.guard():
|
||||
eval_loader, eval_outputs = rec_model(mode="eval")
|
||||
eval_fetch_list = [v.name for v in eval_outputs]
|
||||
eval_prog = eval_prog.clone(for_test=True)
|
||||
|
||||
exe.run(startup_prog)
|
||||
pretrain_weights = config['Global']['pretrain_weights']
|
||||
if pretrain_weights is not None:
|
||||
fluid.load(eval_prog, pretrain_weights)
|
||||
|
||||
eval_data_list = ['IIIT5k_3000', 'SVT', 'IC03_860', 'IC03_867',\
|
||||
'IC13_857', 'IC13_1015', 'IC15_1811', 'IC15_2077', 'SVTP', 'CUTE80']
|
||||
eval_data_dir = config['TestReader']['lmdb_sets_dir']
|
||||
total_forward_time = 0
|
||||
total_evaluation_data_number = 0
|
||||
total_correct_number = 0
|
||||
eval_data_acc_info = {}
|
||||
for eval_data in eval_data_list:
|
||||
config['TestReader']['lmdb_sets_dir'] = \
|
||||
eval_data_dir + "/" + eval_data
|
||||
eval_reader = reader.train_eval_reader(
|
||||
config=config, char_ops=char_ops, mode="test")
|
||||
eval_loader.set_sample_list_generator(eval_reader, places=place)
|
||||
|
||||
start_time = time.time()
|
||||
outs = eval_run(exe, eval_prog, eval_loader, eval_fetch_list, char_ops,
|
||||
"best", "test")
|
||||
infer_time = time.time() - start_time
|
||||
eval_acc, acc_num, sample_num = outs
|
||||
total_forward_time += infer_time
|
||||
total_evaluation_data_number += sample_num
|
||||
total_correct_number += acc_num
|
||||
eval_data_acc_info[eval_data] = outs
|
||||
|
||||
avg_forward_time = total_forward_time / total_evaluation_data_number
|
||||
avg_acc = total_correct_number * 1.0 / total_evaluation_data_number
|
||||
logger.info('-' * 50)
|
||||
strs = ""
|
||||
for eval_data in eval_data_list:
|
||||
eval_acc, acc_num, sample_num = eval_data_acc_info[eval_data]
|
||||
strs += "\n {}, accuracy:{:.6f}".format(eval_data, eval_acc)
|
||||
strs += "\n average, accuracy:{:.6f}, time:{:.6f}".format(avg_acc,
|
||||
avg_forward_time)
|
||||
logger.info(strs)
|
||||
logger.info('-' * 50)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = ArgsParser()
|
||||
FLAGS = parser.parse_args()
|
||||
main()
|
|
@ -0,0 +1,216 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import multiprocessing
|
||||
import numpy as np
|
||||
|
||||
# from paddle.fluid.contrib.model_stat import summary
|
||||
|
||||
|
||||
def set_paddle_flags(**kwargs):
|
||||
for key, value in kwargs.items():
|
||||
if os.environ.get(key, None) is None:
|
||||
os.environ[key] = str(value)
|
||||
|
||||
|
||||
# NOTE(paddle-dev): All of these flags should be
|
||||
# set before `import paddle`. Otherwise, it would
|
||||
# not take any effect.
|
||||
set_paddle_flags(
|
||||
FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
|
||||
)
|
||||
|
||||
from paddle import fluid
|
||||
from ppocr.utils.utility import create_module
|
||||
from ppocr.utils.utility import load_config, merge_config
|
||||
import ppocr.data.det.reader_main as reader
|
||||
from ppocr.utils.utility import ArgsParser
|
||||
from ppocr.utils.character import CharacterOps, cal_predicts_accuracy
|
||||
from ppocr.utils.check import check_gpu
|
||||
from ppocr.utils.stats import TrainingStats
|
||||
from ppocr.utils.checkpoint import load_pretrain, load_checkpoint, save, save_model
|
||||
from ppocr.utils.eval_utils import eval_run
|
||||
from ppocr.utils.eval_utils import eval_det_run
|
||||
|
||||
from ppocr.utils.utility import initial_logger
|
||||
logger = initial_logger()
|
||||
from ppocr.utils.utility import create_multi_devices_program
|
||||
|
||||
|
||||
def main():
|
||||
config = load_config(FLAGS.config)
|
||||
merge_config(FLAGS.opt)
|
||||
print(config)
|
||||
|
||||
alg = config['Global']['algorithm']
|
||||
assert alg in ['EAST', 'DB']
|
||||
|
||||
# check if set use_gpu=True in paddlepaddle cpu version
|
||||
use_gpu = config['Global']['use_gpu']
|
||||
check_gpu(use_gpu)
|
||||
|
||||
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
|
||||
exe = fluid.Executor(place)
|
||||
|
||||
det_model = create_module(config['Architecture']['function'])(params=config)
|
||||
|
||||
startup_prog = fluid.Program()
|
||||
train_prog = fluid.Program()
|
||||
with fluid.program_guard(train_prog, startup_prog):
|
||||
with fluid.unique_name.guard():
|
||||
train_loader, train_outputs = det_model(mode="train")
|
||||
train_fetch_list = [v.name for v in train_outputs]
|
||||
train_loss = train_outputs[0]
|
||||
opt_params = config['Optimizer']
|
||||
optimizer = create_module(opt_params['function'])(opt_params)
|
||||
optimizer.minimize(train_loss)
|
||||
global_lr = optimizer._global_learning_rate()
|
||||
global_lr.persistable = True
|
||||
train_fetch_list.append(global_lr.name)
|
||||
|
||||
eval_prog = fluid.Program()
|
||||
with fluid.program_guard(eval_prog, startup_prog):
|
||||
with fluid.unique_name.guard():
|
||||
eval_loader, eval_outputs = det_model(mode="eval")
|
||||
eval_fetch_list = [v.name for v in eval_outputs]
|
||||
eval_prog = eval_prog.clone(for_test=True)
|
||||
|
||||
train_reader = reader.train_reader(config=config)
|
||||
train_loader.set_sample_list_generator(train_reader, places=place)
|
||||
|
||||
exe.run(startup_prog)
|
||||
|
||||
# compile program for multi-devices
|
||||
train_compile_program = create_multi_devices_program(train_prog,
|
||||
train_loss.name)
|
||||
|
||||
pretrain_weights = config['Global']['pretrain_weights']
|
||||
if pretrain_weights is not None:
|
||||
load_pretrain(exe, train_prog, pretrain_weights)
|
||||
print("pretrain weights loaded!")
|
||||
|
||||
train_batch_id = 0
|
||||
if alg == 'EAST':
|
||||
train_log_keys = ['loss_total', 'loss_cls', 'loss_offset']
|
||||
elif alg == 'DB':
|
||||
train_log_keys = [
|
||||
'loss_total', 'loss_shrink', 'loss_threshold', 'loss_binary'
|
||||
]
|
||||
log_smooth_window = config['Global']['log_smooth_window']
|
||||
epoch_num = config['Global']['epoch_num']
|
||||
print_step = config['Global']['print_step']
|
||||
eval_step = config['Global']['eval_step']
|
||||
save_epoch_step = config['Global']['save_epoch_step']
|
||||
save_dir = config['Global']['save_dir']
|
||||
train_stats = TrainingStats(log_smooth_window, train_log_keys)
|
||||
best_eval_hmean = -1
|
||||
best_batch_id = 0
|
||||
best_epoch = 0
|
||||
for epoch in range(epoch_num):
|
||||
train_loader.start()
|
||||
try:
|
||||
while True:
|
||||
t1 = time.time()
|
||||
train_outs = exe.run(program=train_compile_program,
|
||||
fetch_list=train_fetch_list,
|
||||
return_numpy=False)
|
||||
loss_total = np.mean(np.array(train_outs[0]))
|
||||
if alg == 'EAST':
|
||||
loss_cls = np.mean(np.array(train_outs[1]))
|
||||
loss_offset = np.mean(np.array(train_outs[2]))
|
||||
stats = {'loss_total':loss_total, 'loss_cls':loss_cls,\
|
||||
'loss_offset':loss_offset}
|
||||
elif alg == 'DB':
|
||||
loss_shrink_maps = np.mean(np.array(train_outs[1]))
|
||||
loss_threshold_maps = np.mean(np.array(train_outs[2]))
|
||||
loss_binary_maps = np.mean(np.array(train_outs[3]))
|
||||
stats = {'loss_total':loss_total, 'loss_shrink':loss_shrink_maps, \
|
||||
'loss_threshold':loss_threshold_maps, 'loss_binary':loss_binary_maps}
|
||||
lr = np.mean(np.array(train_outs[-1]))
|
||||
t2 = time.time()
|
||||
train_batch_elapse = t2 - t1
|
||||
|
||||
# stats = {'loss_total':loss_total, 'loss_cls':loss_cls,\
|
||||
# 'loss_offset':loss_offset}
|
||||
train_stats.update(stats)
|
||||
if train_batch_id > 0 and train_batch_id % print_step == 0:
|
||||
logs = train_stats.log()
|
||||
strs = 'epoch: {}, iter: {}, lr: {:.6f}, {}, time: {:.3f}'.format(
|
||||
epoch, train_batch_id, lr, logs, train_batch_elapse)
|
||||
logger.info(strs)
|
||||
|
||||
if train_batch_id > 0 and\
|
||||
train_batch_id % eval_step == 0:
|
||||
metrics = eval_det_run(exe, eval_prog, eval_fetch_list,
|
||||
config, "eval")
|
||||
hmean = metrics['hmean']
|
||||
if hmean >= best_eval_hmean:
|
||||
best_eval_hmean = hmean
|
||||
best_batch_id = train_batch_id
|
||||
best_epoch = epoch
|
||||
save_path = save_dir + "/best_accuracy"
|
||||
save_model(train_prog, save_path)
|
||||
strs = 'Test iter: {}, metrics:{}, best_hmean:{:.6f}, best_epoch:{}, best_batch_id:{}'.format(
|
||||
train_batch_id, metrics, best_eval_hmean, best_epoch,
|
||||
best_batch_id)
|
||||
logger.info(strs)
|
||||
train_batch_id += 1
|
||||
|
||||
except fluid.core.EOFException:
|
||||
train_loader.reset()
|
||||
|
||||
if epoch > 0 and epoch % save_epoch_step == 0:
|
||||
save_path = save_dir + "/iter_epoch_%d" % (epoch)
|
||||
save_model(train_prog, save_path)
|
||||
|
||||
|
||||
def test_reader():
|
||||
config = load_config(FLAGS.config)
|
||||
merge_config(FLAGS.opt)
|
||||
print(config)
|
||||
tmp_reader = reader.train_reader(config=config)
|
||||
count = 0
|
||||
print_count = 0
|
||||
import time
|
||||
while True:
|
||||
starttime = time.time()
|
||||
count = 0
|
||||
for data in tmp_reader():
|
||||
count += 1
|
||||
if print_count % 1 == 0:
|
||||
batch_time = time.time() - starttime
|
||||
starttime = time.time()
|
||||
print("reader:", count, len(data), batch_time)
|
||||
print("finish reader:", count)
|
||||
print("success")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = ArgsParser()
|
||||
parser.add_argument(
|
||||
"-r",
|
||||
"--resume_checkpoint",
|
||||
default=None,
|
||||
type=str,
|
||||
help="Checkpoint path for resuming training.")
|
||||
FLAGS = parser.parse_args()
|
||||
main()
|
||||
# test_reader()
|
|
@ -0,0 +1,222 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import multiprocessing
|
||||
import numpy as np
|
||||
|
||||
# from paddle.fluid.contrib.model_stat import summary
|
||||
|
||||
|
||||
def set_paddle_flags(**kwargs):
|
||||
for key, value in kwargs.items():
|
||||
if os.environ.get(key, None) is None:
|
||||
os.environ[key] = str(value)
|
||||
|
||||
|
||||
# NOTE(paddle-dev): All of these flags should be
|
||||
# set before `import paddle`. Otherwise, it would
|
||||
# not take any effect.
|
||||
set_paddle_flags(
|
||||
FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
|
||||
)
|
||||
|
||||
from paddle import fluid
|
||||
from ppocr.utils.utility import create_module
|
||||
from ppocr.utils.utility import load_config, merge_config
|
||||
import ppocr.data.rec.reader_main as reader
|
||||
from ppocr.utils.utility import ArgsParser
|
||||
from ppocr.utils.character import CharacterOps, cal_predicts_accuracy
|
||||
from ppocr.utils.check import check_gpu
|
||||
from ppocr.utils.stats import TrainingStats
|
||||
from ppocr.utils.checkpoint import load_pretrain, load_checkpoint, save, save_model
|
||||
from ppocr.utils.eval_utils import eval_run
|
||||
|
||||
from ppocr.utils.utility import initial_logger
|
||||
logger = initial_logger()
|
||||
from ppocr.utils.utility import create_multi_devices_program
|
||||
|
||||
|
||||
def main():
|
||||
config = load_config(FLAGS.config)
|
||||
merge_config(FLAGS.opt)
|
||||
char_ops = CharacterOps(config['Global'])
|
||||
config['Global']['char_num'] = char_ops.get_char_num()
|
||||
print(config)
|
||||
|
||||
# check if set use_gpu=True in paddlepaddle cpu version
|
||||
use_gpu = config['Global']['use_gpu']
|
||||
check_gpu(use_gpu)
|
||||
|
||||
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
|
||||
exe = fluid.Executor(place)
|
||||
|
||||
rec_model = create_module(config['Architecture']['function'])(params=config)
|
||||
|
||||
startup_prog = fluid.Program()
|
||||
train_prog = fluid.Program()
|
||||
with fluid.program_guard(train_prog, startup_prog):
|
||||
with fluid.unique_name.guard():
|
||||
train_loader, train_outputs = rec_model(mode="train")
|
||||
save_var = train_outputs[1]
|
||||
|
||||
if "gradient_clip" in config['Global']:
|
||||
gradient_clip = config['Global']['gradient_clip']
|
||||
clip = fluid.clip.GradientClipByGlobalNorm(gradient_clip)
|
||||
fluid.clip.set_gradient_clip(clip, program=train_prog)
|
||||
|
||||
train_fetch_list = [v.name for v in train_outputs]
|
||||
train_loss = train_outputs[0]
|
||||
opt_params = config['Optimizer']
|
||||
optimizer = create_module(opt_params['function'])(opt_params)
|
||||
optimizer.minimize(train_loss)
|
||||
global_lr = optimizer._global_learning_rate()
|
||||
global_lr.persistable = True
|
||||
train_fetch_list.append(global_lr.name)
|
||||
|
||||
train_reader = reader.train_eval_reader(
|
||||
config=config, char_ops=char_ops, mode="train")
|
||||
train_loader.set_sample_list_generator(train_reader, places=place)
|
||||
|
||||
eval_prog = fluid.Program()
|
||||
with fluid.program_guard(eval_prog, startup_prog):
|
||||
with fluid.unique_name.guard():
|
||||
eval_loader, eval_outputs = rec_model(mode="eval")
|
||||
eval_fetch_list = [v.name for v in eval_outputs]
|
||||
|
||||
eval_prog = eval_prog.clone(for_test=True)
|
||||
exe.run(startup_prog)
|
||||
|
||||
eval_reader = reader.train_eval_reader(
|
||||
config=config, char_ops=char_ops, mode="eval")
|
||||
eval_loader.set_sample_list_generator(eval_reader, places=place)
|
||||
|
||||
# compile program for multi-devices
|
||||
train_compile_program = create_multi_devices_program(train_prog,
|
||||
train_loss.name)
|
||||
|
||||
pretrain_weights = config['Global']['pretrain_weights']
|
||||
if pretrain_weights is not None:
|
||||
load_pretrain(exe, train_prog, pretrain_weights)
|
||||
|
||||
train_batch_id = 0
|
||||
train_log_keys = ['loss', 'acc']
|
||||
log_smooth_window = config['Global']['log_smooth_window']
|
||||
epoch_num = config['Global']['epoch_num']
|
||||
loss_type = config['Global']['loss_type']
|
||||
print_step = config['Global']['print_step']
|
||||
eval_step = config['Global']['eval_step']
|
||||
save_epoch_step = config['Global']['save_epoch_step']
|
||||
save_dir = config['Global']['save_dir']
|
||||
train_stats = TrainingStats(log_smooth_window, train_log_keys)
|
||||
best_eval_acc = -1
|
||||
best_batch_id = 0
|
||||
best_epoch = 0
|
||||
for epoch in range(epoch_num):
|
||||
train_loader.start()
|
||||
try:
|
||||
while True:
|
||||
t1 = time.time()
|
||||
train_outs = exe.run(program=train_compile_program,
|
||||
fetch_list=train_fetch_list,
|
||||
return_numpy=False)
|
||||
loss = np.mean(np.array(train_outs[0]))
|
||||
lr = np.mean(np.array(train_outs[-1]))
|
||||
|
||||
preds = np.array(train_outs[1])
|
||||
preds_lod = train_outs[1].lod()[0]
|
||||
labels = np.array(train_outs[2])
|
||||
labels_lod = train_outs[2].lod()[0]
|
||||
|
||||
acc, acc_num, img_num = cal_predicts_accuracy(
|
||||
char_ops, preds, preds_lod, labels, labels_lod)
|
||||
|
||||
t2 = time.time()
|
||||
train_batch_elapse = t2 - t1
|
||||
|
||||
stats = {'loss': loss, 'acc': acc}
|
||||
train_stats.update(stats)
|
||||
if train_batch_id > 0 and train_batch_id % print_step == 0:
|
||||
logs = train_stats.log()
|
||||
strs = 'epoch: {}, iter: {}, lr: {:.6f}, {}, time: {:.3f}'.format(
|
||||
epoch, train_batch_id, lr, logs, train_batch_elapse)
|
||||
logger.info(strs)
|
||||
|
||||
if train_batch_id > 0 and train_batch_id % eval_step == 0:
|
||||
outs = eval_run(exe, eval_prog, eval_loader,
|
||||
eval_fetch_list, char_ops, train_batch_id,
|
||||
"eval")
|
||||
eval_acc, acc_num, sample_num = outs
|
||||
if eval_acc > best_eval_acc:
|
||||
best_eval_acc = eval_acc
|
||||
best_batch_id = train_batch_id
|
||||
best_epoch = epoch
|
||||
save_path = save_dir + "/best_accuracy"
|
||||
save_model(train_prog, save_path)
|
||||
|
||||
strs = 'Test iter: {}, acc:{:.6f}, best_acc:{:.6f}, best_epoch:{}, best_batch_id:{}, sample_num:{}'.format(
|
||||
train_batch_id, eval_acc, best_eval_acc, best_epoch,
|
||||
best_batch_id, sample_num)
|
||||
logger.info(strs)
|
||||
train_batch_id += 1
|
||||
|
||||
except fluid.core.EOFException:
|
||||
train_loader.reset()
|
||||
|
||||
if epoch > 0 and epoch % save_epoch_step == 0:
|
||||
save_path = save_dir + "/iter_epoch_%d" % (epoch)
|
||||
save_model(train_prog, save_path)
|
||||
|
||||
|
||||
def test_reader():
|
||||
config = load_config(FLAGS.config)
|
||||
merge_config(FLAGS.opt)
|
||||
char_ops = CharacterOps(config['Global'])
|
||||
config['Global']['char_num'] = char_ops.get_char_num()
|
||||
print(config)
|
||||
# tmp_reader = reader.train_eval_reader(
|
||||
# config=cfg, char_ops=char_ops, mode="train")
|
||||
tmp_reader = reader.train_eval_reader(
|
||||
config=config, char_ops=char_ops, mode="eval")
|
||||
count = 0
|
||||
print_count = 0
|
||||
import time
|
||||
starttime = time.time()
|
||||
for data in tmp_reader():
|
||||
count += len(data)
|
||||
print_count += 1
|
||||
if print_count % 10 == 0:
|
||||
batch_time = (time.time() - starttime) / print_count
|
||||
print("reader:", count, len(data), batch_time)
|
||||
print("finish reader:", count)
|
||||
print("success")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = ArgsParser()
|
||||
parser.add_argument(
|
||||
"-r",
|
||||
"--resume_checkpoint",
|
||||
default=None,
|
||||
type=str,
|
||||
help="Checkpoint path for resuming training.")
|
||||
FLAGS = parser.parse_args()
|
||||
main()
|
||||
# test_reader()
|
|
@ -0,0 +1,113 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import multiprocessing
|
||||
import numpy as np
|
||||
|
||||
|
||||
def set_paddle_flags(**kwargs):
|
||||
for key, value in kwargs.items():
|
||||
if os.environ.get(key, None) is None:
|
||||
os.environ[key] = str(value)
|
||||
|
||||
|
||||
# NOTE(paddle-dev): All of these flags should be
|
||||
# set before `import paddle`. Otherwise, it would
|
||||
# not take any effect.
|
||||
set_paddle_flags(
|
||||
FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
|
||||
)
|
||||
|
||||
import program
|
||||
from paddle import fluid
|
||||
from ppocr.utils.utility import initial_logger
|
||||
logger = initial_logger()
|
||||
from ppocr.data.reader_main import reader_main
|
||||
from ppocr.utils.save_load import init_model
|
||||
from ppocr.utils.character import CharacterOps
|
||||
|
||||
|
||||
def main():
|
||||
config = program.load_config(FLAGS.config)
|
||||
program.merge_config(FLAGS.opt)
|
||||
logger.info(config)
|
||||
|
||||
# check if set use_gpu=True in paddlepaddle cpu version
|
||||
use_gpu = config['Global']['use_gpu']
|
||||
program.check_gpu(True)
|
||||
|
||||
alg = config['Global']['algorithm']
|
||||
assert alg in ['EAST', 'DB', 'Rosetta', 'CRNN', 'STARNet', 'RARE']
|
||||
if alg in ['Rosetta', 'CRNN', 'STARNet', 'RARE']:
|
||||
config['Global']['char_ops'] = CharacterOps(config['Global'])
|
||||
|
||||
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
|
||||
startup_program = fluid.Program()
|
||||
train_program = fluid.Program()
|
||||
train_build_outputs = program.build(
|
||||
config, train_program, startup_program, mode='train')
|
||||
train_loader = train_build_outputs[0]
|
||||
train_fetch_name_list = train_build_outputs[1]
|
||||
train_fetch_varname_list = train_build_outputs[2]
|
||||
train_opt_loss_name = train_build_outputs[3]
|
||||
|
||||
eval_program = fluid.Program()
|
||||
eval_build_outputs = program.build(
|
||||
config, eval_program, startup_program, mode='eval')
|
||||
eval_fetch_name_list = eval_build_outputs[1]
|
||||
eval_fetch_varname_list = eval_build_outputs[2]
|
||||
eval_program = eval_program.clone(for_test=True)
|
||||
|
||||
train_reader = reader_main(config=config, mode="train")
|
||||
train_loader.set_sample_list_generator(train_reader, places=place)
|
||||
|
||||
eval_reader = reader_main(config=config, mode="eval")
|
||||
|
||||
exe = fluid.Executor(place)
|
||||
exe.run(startup_program)
|
||||
|
||||
# compile program for multi-devices
|
||||
train_compile_program = program.create_multi_devices_program(
|
||||
train_program, train_opt_loss_name)
|
||||
init_model(config, train_program, exe)
|
||||
|
||||
train_info_dict = {'compile_program':train_compile_program,\
|
||||
'train_program':train_program,\
|
||||
'reader':train_loader,\
|
||||
'fetch_name_list':train_fetch_name_list,\
|
||||
'fetch_varname_list':train_fetch_varname_list}
|
||||
|
||||
eval_info_dict = {'program':eval_program,\
|
||||
'reader':eval_reader,\
|
||||
'fetch_name_list':eval_fetch_name_list,\
|
||||
'fetch_varname_list':eval_fetch_varname_list}
|
||||
|
||||
if alg in ['EAST', 'DB']:
|
||||
program.train_eval_det_run(config, exe, train_info_dict, eval_info_dict)
|
||||
else:
|
||||
program.train_eval_rec_run(config, exe, train_info_dict, eval_info_dict)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = program.ArgsParser()
|
||||
FLAGS = parser.parse_args()
|
||||
main()
|
||||
# test_reader()
|
Loading…
Reference in New Issue