polish srn anno

2020-10-28 15:59:53 +08:00 · 2020-10-28 15:59:53 +08:00 · fa12cf0b6d
parent 234bb38c8a
commit fa12cf0b6d
1 changed files with 32 additions and 0 deletions
--- a/ppocr/modeling/heads/rec_srn_all_head.py
+++ b/ppocr/modeling/heads/rec_srn_all_head.py
@ -28,6 +28,13 @@ gradient_clip = 10
 class SRNPredict(object):
    """
    SRN:
        see arxiv: https://arxiv.org/abs/2003.12294
    args:
        params(dict): the super parameters for network build
    """
    def __init__(self, params):
        super(SRNPredict, self).__init__()
        self.char_num = params['char_num']
@ -39,7 +46,15 @@ class SRNPredict(object):
        self.hidden_dims = params['hidden_dims']
    def pvam(self, inputs, others):
        """
        Parallel visual attention module model
        args:
            inputs(variable):  Feature map extracted from backbone network
            others(list):  Other location information variables
        return: pvam_features
        """
        b, c, h, w = inputs.shape
        conv_features = fluid.layers.reshape(x=inputs, shape=[-1, c, h * w])
        conv_features = fluid.layers.transpose(x=conv_features, perm=[0, 2, 1])
@ -98,6 +113,15 @@ class SRNPredict(object):
        return pvam_features
    def gsrm(self, pvam_features, others):
        """
        Global Semantic Reasonging Module
        args:
            pvam_features(variable):  Feature map extracted from pvam
            others(list):  Other location information variables
        return: gsrm_features, word_out, gsrm_out
        """
        #===== GSRM Visual-to-semantic embedding block =====
        b, t, c = pvam_features.shape
@ -190,7 +214,15 @@ class SRNPredict(object):
        return gsrm_features, word_out, gsrm_out
    def vsfd(self, pvam_features, gsrm_features):
        """
        Visual-Semantic Fusion Decoder Module
        args:
            pvam_features(variable):  Feature map extracted from pvam
            gsrm_features(list):  Feature map extracted from gsrm
        return: fc_out
        """
        #===== Visual-Semantic Fusion Decoder Module =====
        b, t, c1 = pvam_features.shape
        b, t, c2 = gsrm_features.shape