From 911c604dd6f8ae7d46d556a99690ce9dccdef348 Mon Sep 17 00:00:00 2001 From: tink2123 Date: Wed, 28 Oct 2020 14:25:30 +0800 Subject: [PATCH] add anno for head --- ppocr/modeling/heads/cls_head.py | 8 + ppocr/modeling/heads/det_east_head.py | 6 + ppocr/modeling/heads/det_sast_head.py | 344 +++++++++++++++++++++----- 3 files changed, 291 insertions(+), 67 deletions(-) diff --git a/ppocr/modeling/heads/cls_head.py b/ppocr/modeling/heads/cls_head.py index 4567adcb..729561f9 100644 --- a/ppocr/modeling/heads/cls_head.py +++ b/ppocr/modeling/heads/cls_head.py @@ -23,6 +23,14 @@ import paddle.fluid as fluid class ClsHead(object): + """ + Class orientation + + Args: + + params(dict): super parameters for build Class network + """ + def __init__(self, params): super(ClsHead, self).__init__() self.class_dim = params['class_dim'] diff --git a/ppocr/modeling/heads/det_east_head.py b/ppocr/modeling/heads/det_east_head.py index de6ed51d..e2d9176e 100755 --- a/ppocr/modeling/heads/det_east_head.py +++ b/ppocr/modeling/heads/det_east_head.py @@ -109,6 +109,12 @@ class EASTHead(object): return f_score, f_geo def __call__(self, inputs): + """ + Fuse different levels of feature map from backbone and predict results + Args: + inputs(list): feature maps from backbone + Return: predicts + """ f_common = self.unet_fusion(inputs) f_score, f_geo = self.detector_header(f_common) predicts = OrderedDict() diff --git a/ppocr/modeling/heads/det_sast_head.py b/ppocr/modeling/heads/det_sast_head.py index 0097913d..7b9520e5 100644 --- a/ppocr/modeling/heads/det_sast_head.py +++ b/ppocr/modeling/heads/det_sast_head.py @@ -38,35 +38,66 @@ class SASTHead(object): blocks{}: contain block_2, block_3, block_4, block_5, block_6, block_7 with 1/4, 1/8, 1/16, 1/32, 1/64, 1/128 resolution. """ - f = [blocks['block_6'], blocks['block_5'], blocks['block_4'], blocks['block_3'], blocks['block_2']] + f = [ + blocks['block_6'], blocks['block_5'], blocks['block_4'], + blocks['block_3'], blocks['block_2'] + ] num_outputs = [256, 256, 192, 192, 128] g = [None, None, None, None, None] - h = [None, None, None, None, None] + h = [None, None, None, None, None] for i in range(5): - h[i] = conv_bn_layer(input=f[i], num_filters=num_outputs[i], - filter_size=1, stride=1, act=None, name='fpn_up_h'+str(i)) + h[i] = conv_bn_layer( + input=f[i], + num_filters=num_outputs[i], + filter_size=1, + stride=1, + act=None, + name='fpn_up_h' + str(i)) for i in range(4): if i == 0: - g[i] = deconv_bn_layer(input=h[i], num_filters=num_outputs[i + 1], act=None, name='fpn_up_g0') + g[i] = deconv_bn_layer( + input=h[i], + num_filters=num_outputs[i + 1], + act=None, + name='fpn_up_g0') #print("g[{}] shape: {}".format(i, g[i].shape)) else: g[i] = fluid.layers.elementwise_add(x=g[i - 1], y=h[i]) g[i] = fluid.layers.relu(g[i]) #g[i] = conv_bn_layer(input=g[i], num_filters=num_outputs[i], # filter_size=1, stride=1, act='relu') - g[i] = conv_bn_layer(input=g[i], num_filters=num_outputs[i], - filter_size=3, stride=1, act='relu', name='fpn_up_g%d_1'%i) - g[i] = deconv_bn_layer(input=g[i], num_filters=num_outputs[i + 1], act=None, name='fpn_up_g%d_2'%i) + g[i] = conv_bn_layer( + input=g[i], + num_filters=num_outputs[i], + filter_size=3, + stride=1, + act='relu', + name='fpn_up_g%d_1' % i) + g[i] = deconv_bn_layer( + input=g[i], + num_filters=num_outputs[i + 1], + act=None, + name='fpn_up_g%d_2' % i) #print("g[{}] shape: {}".format(i, g[i].shape)) g[4] = fluid.layers.elementwise_add(x=g[3], y=h[4]) g[4] = fluid.layers.relu(g[4]) - g[4] = conv_bn_layer(input=g[4], num_filters=num_outputs[4], - filter_size=3, stride=1, act='relu', name='fpn_up_fusion_1') - g[4] = conv_bn_layer(input=g[4], num_filters=num_outputs[4], - filter_size=1, stride=1, act=None, name='fpn_up_fusion_2') - + g[4] = conv_bn_layer( + input=g[4], + num_filters=num_outputs[4], + filter_size=3, + stride=1, + act='relu', + name='fpn_up_fusion_1') + g[4] = conv_bn_layer( + input=g[4], + num_filters=num_outputs[4], + filter_size=1, + stride=1, + act=None, + name='fpn_up_fusion_2') + return g[4] def FPN_Down_Fusion(self, blocks): @@ -77,95 +108,245 @@ class SASTHead(object): f = [blocks['block_0'], blocks['block_1'], blocks['block_2']] num_outputs = [32, 64, 128] g = [None, None, None] - h = [None, None, None] + h = [None, None, None] for i in range(3): - h[i] = conv_bn_layer(input=f[i], num_filters=num_outputs[i], - filter_size=3, stride=1, act=None, name='fpn_down_h'+str(i)) + h[i] = conv_bn_layer( + input=f[i], + num_filters=num_outputs[i], + filter_size=3, + stride=1, + act=None, + name='fpn_down_h' + str(i)) for i in range(2): if i == 0: - g[i] = conv_bn_layer(input=h[i], num_filters=num_outputs[i+1], filter_size=3, stride=2, act=None, name='fpn_down_g0') + g[i] = conv_bn_layer( + input=h[i], + num_filters=num_outputs[i + 1], + filter_size=3, + stride=2, + act=None, + name='fpn_down_g0') else: g[i] = fluid.layers.elementwise_add(x=g[i - 1], y=h[i]) g[i] = fluid.layers.relu(g[i]) - g[i] = conv_bn_layer(input=g[i], num_filters=num_outputs[i], filter_size=3, stride=1, act='relu', name='fpn_down_g%d_1'%i) - g[i] = conv_bn_layer(input=g[i], num_filters=num_outputs[i+1], filter_size=3, stride=2, act=None, name='fpn_down_g%d_2'%i) + g[i] = conv_bn_layer( + input=g[i], + num_filters=num_outputs[i], + filter_size=3, + stride=1, + act='relu', + name='fpn_down_g%d_1' % i) + g[i] = conv_bn_layer( + input=g[i], + num_filters=num_outputs[i + 1], + filter_size=3, + stride=2, + act=None, + name='fpn_down_g%d_2' % i) # print("g[{}] shape: {}".format(i, g[i].shape)) g[2] = fluid.layers.elementwise_add(x=g[1], y=h[2]) g[2] = fluid.layers.relu(g[2]) - g[2] = conv_bn_layer(input=g[2], num_filters=num_outputs[2], - filter_size=3, stride=1, act='relu', name='fpn_down_fusion_1') - g[2] = conv_bn_layer(input=g[2], num_filters=num_outputs[2], - filter_size=1, stride=1, act=None, name='fpn_down_fusion_2') + g[2] = conv_bn_layer( + input=g[2], + num_filters=num_outputs[2], + filter_size=3, + stride=1, + act='relu', + name='fpn_down_fusion_1') + g[2] = conv_bn_layer( + input=g[2], + num_filters=num_outputs[2], + filter_size=1, + stride=1, + act=None, + name='fpn_down_fusion_2') return g[2] def SAST_Header1(self, f_common): """Detector header.""" #f_score - f_score = conv_bn_layer(input=f_common, num_filters=64, filter_size=1, stride=1, act='relu', name='f_score1') - f_score = conv_bn_layer(input=f_score, num_filters=64, filter_size=3, stride=1, act='relu', name='f_score2') - f_score = conv_bn_layer(input=f_score, num_filters=128, filter_size=1, stride=1, act='relu', name='f_score3') - f_score = conv_bn_layer(input=f_score, num_filters=1, filter_size=3, stride=1, name='f_score4') + f_score = conv_bn_layer( + input=f_common, + num_filters=64, + filter_size=1, + stride=1, + act='relu', + name='f_score1') + f_score = conv_bn_layer( + input=f_score, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name='f_score2') + f_score = conv_bn_layer( + input=f_score, + num_filters=128, + filter_size=1, + stride=1, + act='relu', + name='f_score3') + f_score = conv_bn_layer( + input=f_score, + num_filters=1, + filter_size=3, + stride=1, + name='f_score4') f_score = fluid.layers.sigmoid(f_score) # print("f_score shape: {}".format(f_score.shape)) #f_boder - f_border = conv_bn_layer(input=f_common, num_filters=64, filter_size=1, stride=1, act='relu', name='f_border1') - f_border = conv_bn_layer(input=f_border, num_filters=64, filter_size=3, stride=1, act='relu', name='f_border2') - f_border = conv_bn_layer(input=f_border, num_filters=128, filter_size=1, stride=1, act='relu', name='f_border3') - f_border = conv_bn_layer(input=f_border, num_filters=4, filter_size=3, stride=1, name='f_border4') + f_border = conv_bn_layer( + input=f_common, + num_filters=64, + filter_size=1, + stride=1, + act='relu', + name='f_border1') + f_border = conv_bn_layer( + input=f_border, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name='f_border2') + f_border = conv_bn_layer( + input=f_border, + num_filters=128, + filter_size=1, + stride=1, + act='relu', + name='f_border3') + f_border = conv_bn_layer( + input=f_border, + num_filters=4, + filter_size=3, + stride=1, + name='f_border4') # print("f_border shape: {}".format(f_border.shape)) - + return f_score, f_border def SAST_Header2(self, f_common): - """Detector header.""" + """Detector header.""" #f_tvo - f_tvo = conv_bn_layer(input=f_common, num_filters=64, filter_size=1, stride=1, act='relu', name='f_tvo1') - f_tvo = conv_bn_layer(input=f_tvo, num_filters=64, filter_size=3, stride=1, act='relu', name='f_tvo2') - f_tvo = conv_bn_layer(input=f_tvo, num_filters=128, filter_size=1, stride=1, act='relu', name='f_tvo3') - f_tvo = conv_bn_layer(input=f_tvo, num_filters=8, filter_size=3, stride=1, name='f_tvo4') + f_tvo = conv_bn_layer( + input=f_common, + num_filters=64, + filter_size=1, + stride=1, + act='relu', + name='f_tvo1') + f_tvo = conv_bn_layer( + input=f_tvo, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name='f_tvo2') + f_tvo = conv_bn_layer( + input=f_tvo, + num_filters=128, + filter_size=1, + stride=1, + act='relu', + name='f_tvo3') + f_tvo = conv_bn_layer( + input=f_tvo, num_filters=8, filter_size=3, stride=1, name='f_tvo4') # print("f_tvo shape: {}".format(f_tvo.shape)) #f_tco - f_tco = conv_bn_layer(input=f_common, num_filters=64, filter_size=1, stride=1, act='relu', name='f_tco1') - f_tco = conv_bn_layer(input=f_tco, num_filters=64, filter_size=3, stride=1, act='relu', name='f_tco2') - f_tco = conv_bn_layer(input=f_tco, num_filters=128, filter_size=1, stride=1, act='relu', name='f_tco3') - f_tco = conv_bn_layer(input=f_tco, num_filters=2, filter_size=3, stride=1, name='f_tco4') + f_tco = conv_bn_layer( + input=f_common, + num_filters=64, + filter_size=1, + stride=1, + act='relu', + name='f_tco1') + f_tco = conv_bn_layer( + input=f_tco, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name='f_tco2') + f_tco = conv_bn_layer( + input=f_tco, + num_filters=128, + filter_size=1, + stride=1, + act='relu', + name='f_tco3') + f_tco = conv_bn_layer( + input=f_tco, num_filters=2, filter_size=3, stride=1, name='f_tco4') # print("f_tco shape: {}".format(f_tco.shape)) - + return f_tvo, f_tco def cross_attention(self, f_common): """ """ f_shape = fluid.layers.shape(f_common) - f_theta = conv_bn_layer(input=f_common, num_filters=128, filter_size=1, stride=1, act='relu', name='f_theta') - f_phi = conv_bn_layer(input=f_common, num_filters=128, filter_size=1, stride=1, act='relu', name='f_phi') - f_g = conv_bn_layer(input=f_common, num_filters=128, filter_size=1, stride=1, act='relu', name='f_g') + f_theta = conv_bn_layer( + input=f_common, + num_filters=128, + filter_size=1, + stride=1, + act='relu', + name='f_theta') + f_phi = conv_bn_layer( + input=f_common, + num_filters=128, + filter_size=1, + stride=1, + act='relu', + name='f_phi') + f_g = conv_bn_layer( + input=f_common, + num_filters=128, + filter_size=1, + stride=1, + act='relu', + name='f_g') ### horizon fh_theta = f_theta fh_phi = f_phi fh_g = f_g #flatten fh_theta = fluid.layers.transpose(fh_theta, [0, 2, 3, 1]) - fh_theta = fluid.layers.reshape(fh_theta, [f_shape[0] * f_shape[2], f_shape[3], 128]) + fh_theta = fluid.layers.reshape( + fh_theta, [f_shape[0] * f_shape[2], f_shape[3], 128]) fh_phi = fluid.layers.transpose(fh_phi, [0, 2, 3, 1]) - fh_phi = fluid.layers.reshape(fh_phi, [f_shape[0] * f_shape[2], f_shape[3], 128]) + fh_phi = fluid.layers.reshape( + fh_phi, [f_shape[0] * f_shape[2], f_shape[3], 128]) fh_g = fluid.layers.transpose(fh_g, [0, 2, 3, 1]) - fh_g = fluid.layers.reshape(fh_g, [f_shape[0] * f_shape[2], f_shape[3], 128]) + fh_g = fluid.layers.reshape(fh_g, + [f_shape[0] * f_shape[2], f_shape[3], 128]) #correlation - fh_attn = fluid.layers.matmul(fh_theta, fluid.layers.transpose(fh_phi, [0, 2, 1])) + fh_attn = fluid.layers.matmul(fh_theta, + fluid.layers.transpose(fh_phi, [0, 2, 1])) #scale - fh_attn = fh_attn / (128 ** 0.5) + fh_attn = fh_attn / (128**0.5) fh_attn = fluid.layers.softmax(fh_attn) #weighted sum fh_weight = fluid.layers.matmul(fh_attn, fh_g) - fh_weight = fluid.layers.reshape(fh_weight, [f_shape[0], f_shape[2], f_shape[3], 128]) + fh_weight = fluid.layers.reshape( + fh_weight, [f_shape[0], f_shape[2], f_shape[3], 128]) # print("fh_weight: {}".format(fh_weight.shape)) fh_weight = fluid.layers.transpose(fh_weight, [0, 3, 1, 2]) - fh_weight = conv_bn_layer(input=fh_weight, num_filters=128, filter_size=1, stride=1, name='fh_weight') + fh_weight = conv_bn_layer( + input=fh_weight, + num_filters=128, + filter_size=1, + stride=1, + name='fh_weight') #short cut - fh_sc = conv_bn_layer(input=f_common, num_filters=128, filter_size=1, stride=1, name='fh_sc') + fh_sc = conv_bn_layer( + input=f_common, + num_filters=128, + filter_size=1, + stride=1, + name='fh_sc') f_h = fluid.layers.relu(fh_weight + fh_sc) ###### #vertical @@ -174,31 +355,60 @@ class SASTHead(object): fv_g = fluid.layers.transpose(f_g, [0, 1, 3, 2]) #flatten fv_theta = fluid.layers.transpose(fv_theta, [0, 2, 3, 1]) - fv_theta = fluid.layers.reshape(fv_theta, [f_shape[0] * f_shape[3], f_shape[2], 128]) + fv_theta = fluid.layers.reshape( + fv_theta, [f_shape[0] * f_shape[3], f_shape[2], 128]) fv_phi = fluid.layers.transpose(fv_phi, [0, 2, 3, 1]) - fv_phi = fluid.layers.reshape(fv_phi, [f_shape[0] * f_shape[3], f_shape[2], 128]) + fv_phi = fluid.layers.reshape( + fv_phi, [f_shape[0] * f_shape[3], f_shape[2], 128]) fv_g = fluid.layers.transpose(fv_g, [0, 2, 3, 1]) - fv_g = fluid.layers.reshape(fv_g, [f_shape[0] * f_shape[3], f_shape[2], 128]) + fv_g = fluid.layers.reshape(fv_g, + [f_shape[0] * f_shape[3], f_shape[2], 128]) #correlation - fv_attn = fluid.layers.matmul(fv_theta, fluid.layers.transpose(fv_phi, [0, 2, 1])) + fv_attn = fluid.layers.matmul(fv_theta, + fluid.layers.transpose(fv_phi, [0, 2, 1])) #scale - fv_attn = fv_attn / (128 ** 0.5) + fv_attn = fv_attn / (128**0.5) fv_attn = fluid.layers.softmax(fv_attn) #weighted sum fv_weight = fluid.layers.matmul(fv_attn, fv_g) - fv_weight = fluid.layers.reshape(fv_weight, [f_shape[0], f_shape[3], f_shape[2], 128]) + fv_weight = fluid.layers.reshape( + fv_weight, [f_shape[0], f_shape[3], f_shape[2], 128]) # print("fv_weight: {}".format(fv_weight.shape)) fv_weight = fluid.layers.transpose(fv_weight, [0, 3, 2, 1]) - fv_weight = conv_bn_layer(input=fv_weight, num_filters=128, filter_size=1, stride=1, name='fv_weight') + fv_weight = conv_bn_layer( + input=fv_weight, + num_filters=128, + filter_size=1, + stride=1, + name='fv_weight') #short cut - fv_sc = conv_bn_layer(input=f_common, num_filters=128, filter_size=1, stride=1, name='fv_sc') + fv_sc = conv_bn_layer( + input=f_common, + num_filters=128, + filter_size=1, + stride=1, + name='fv_sc') f_v = fluid.layers.relu(fv_weight + fv_sc) ###### f_attn = fluid.layers.concat([f_h, f_v], axis=1) - f_attn = conv_bn_layer(input=f_attn, num_filters=128, filter_size=1, stride=1, act='relu', name='f_attn') + f_attn = conv_bn_layer( + input=f_attn, + num_filters=128, + filter_size=1, + stride=1, + act='relu', + name='f_attn') return f_attn - + def __call__(self, blocks, with_cab=False): + """ + Fuse different levels of feature map from backbone and predict results + Args: + blocks(list): feature maps from backbone + with_cab(bool): whether use cross_attention + Return: predicts + """ + # for k, v in blocks.items(): # print(k, v.shape) @@ -212,12 +422,12 @@ class SASTHead(object): f_common = fluid.layers.elementwise_add(x=f_down, y=f_up) f_common = fluid.layers.relu(f_common) # print("f_common: {}".format(f_common.shape)) - + if self.with_cab: # print('enhence f_common with CAB.') f_common = self.cross_attention(f_common) - - f_score, f_border= self.SAST_Header1(f_common) + + f_score, f_border = self.SAST_Header1(f_common) f_tvo, f_tco = self.SAST_Header2(f_common) predicts = OrderedDict() @@ -225,4 +435,4 @@ class SASTHead(object): predicts['f_border'] = f_border predicts['f_tvo'] = f_tvo predicts['f_tco'] = f_tco - return predicts \ No newline at end of file + return predicts