From 4d44b23043063202852d157deb4206c2f91dcf29 Mon Sep 17 00:00:00 2001 From: WenmuZhou Date: Thu, 12 Nov 2020 12:06:46 +0800 Subject: [PATCH 1/4] =?UTF-8?q?=E8=AF=86=E5=88=AB=E6=A8=A1=E5=9E=8B?= =?UTF-8?q?=E5=AF=BC=E5=87=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tools/program.py | 16 +++++++++++++++- tools/train.py | 7 ++++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/tools/program.py b/tools/program.py index 8bae0fd5..c2b9306c 100755 --- a/tools/program.py +++ b/tools/program.py @@ -323,6 +323,20 @@ def eval(model, valid_dataloader, post_process_class, eval_class): return metirc +def save_inference_mode(model, config, logger): + model.eval() + save_path = '{}/infer/{}'.format(config['Global']['save_model_dir'], + config['Architecture']['model_type']) + if config['Architecture']['model_type'] == 'rec': + input_shape = [None, 3, 32, None] + jit_model = paddle.jit.to_static( + model, input_spec=[paddle.static.InputSpec(input_shape)]) + paddle.jit.save(jit_model, save_path) + logger.info('inference model save to {}'.format(save_path)) + + model.train() + + def preprocess(): FLAGS = ArgsParser().parse_args() config = load_config(FLAGS.config) @@ -334,7 +348,7 @@ def preprocess(): alg = config['Architecture']['algorithm'] assert alg in [ - 'EAST', 'DB', 'SAST', 'Rosetta', 'CRNN', 'STARNet', 'RARE', 'SRN' + 'EAST', 'DB', 'SAST', 'Rosetta', 'CRNN', 'STARNet', 'RARE', 'SRN', 'CLS' ] device = 'gpu:{}'.format(dist.ParallelEnv().dev_id) if use_gpu else 'cpu' diff --git a/tools/train.py b/tools/train.py index c1622379..1cf644e6 100755 --- a/tools/train.py +++ b/tools/train.py @@ -89,6 +89,7 @@ def main(config, device, logger, vdl_writer): program.train(config, train_dataloader, valid_dataloader, device, model, loss_class, optimizer, lr_scheduler, post_process_class, eval_class, pre_best_model_dict, logger, vdl_writer) + program.save_inference_mode(model, config, logger) def test_reader(config, device, logger): @@ -102,8 +103,8 @@ def test_reader(config, device, logger): if count % 1 == 0: batch_time = time.time() - starttime starttime = time.time() - logger.info("reader: {}, {}, {}".format(count, - len(data), batch_time)) + logger.info("reader: {}, {}, {}".format( + count, len(data[0]), batch_time)) except Exception as e: logger.info(e) logger.info("finish reader: {}, Success!".format(count)) @@ -112,4 +113,4 @@ def test_reader(config, device, logger): if __name__ == '__main__': config, device, logger, vdl_writer = program.preprocess() main(config, device, logger, vdl_writer) -# test_reader(config, device, logger) + # test_reader(config, device, logger) From 453c6f68bd1f0b8470ffd3a2072d9483750808e1 Mon Sep 17 00:00:00 2001 From: WenmuZhou Date: Thu, 12 Nov 2020 12:07:41 +0800 Subject: [PATCH 2/4] =?UTF-8?q?=E8=AF=86=E5=88=AB=E6=A8=A1=E5=9E=8Binferen?= =?UTF-8?q?ce?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tools/infer/predict_rec.py | 76 +++++++++----------------------------- 1 file changed, 18 insertions(+), 58 deletions(-) diff --git a/tools/infer/predict_rec.py b/tools/infer/predict_rec.py index 6a379853..a55f671e 100755 --- a/tools/infer/predict_rec.py +++ b/tools/infer/predict_rec.py @@ -26,34 +26,27 @@ import time import paddle.fluid as fluid import tools.infer.utility as utility -from ppocr.utils.utility import initial_logger -logger = initial_logger() +from ppocr.postprocess import build_post_process +from ppocr.utils.logging import get_logger from ppocr.utils.utility import get_image_file_list, check_and_read_gif -from ppocr.utils.character import CharacterOps class TextRecognizer(object): def __init__(self, args): - self.predictor, self.input_tensor, self.output_tensors =\ - utility.create_predictor(args, mode="rec") self.rec_image_shape = [int(v) for v in args.rec_image_shape.split(",")] self.character_type = args.rec_char_type self.rec_batch_num = args.rec_batch_num self.rec_algorithm = args.rec_algorithm self.use_zero_copy_run = args.use_zero_copy_run - char_ops_params = { + postprocess_params = { + 'name': 'CTCLabelDecode', "character_type": args.rec_char_type, "character_dict_path": args.rec_char_dict_path, - "use_space_char": args.use_space_char, - "max_text_length": args.max_text_length + "use_space_char": args.use_space_char } - if self.rec_algorithm != "RARE": - char_ops_params['loss_type'] = 'ctc' - self.loss_type = 'ctc' - else: - char_ops_params['loss_type'] = 'attention' - self.loss_type = 'attention' - self.char_ops = CharacterOps(char_ops_params) + self.postprocess_op = build_post_process(postprocess_params) + self.predictor, self.input_tensor, self.output_tensors = \ + utility.create_predictor(args, 'rec', logger) def resize_norm_img(self, img, max_wh_ratio): imgC, imgH, imgW = self.rec_image_shape @@ -112,48 +105,14 @@ class TextRecognizer(object): else: norm_img_batch = fluid.core.PaddleTensor(norm_img_batch) self.predictor.run([norm_img_batch]) - - if self.loss_type == "ctc": - rec_idx_batch = self.output_tensors[0].copy_to_cpu() - rec_idx_lod = self.output_tensors[0].lod()[0] - predict_batch = self.output_tensors[1].copy_to_cpu() - predict_lod = self.output_tensors[1].lod()[0] - elapse = time.time() - starttime - predict_time += elapse - for rno in range(len(rec_idx_lod) - 1): - beg = rec_idx_lod[rno] - end = rec_idx_lod[rno + 1] - rec_idx_tmp = rec_idx_batch[beg:end, 0] - preds_text = self.char_ops.decode(rec_idx_tmp) - beg = predict_lod[rno] - end = predict_lod[rno + 1] - probs = predict_batch[beg:end, :] - ind = np.argmax(probs, axis=1) - blank = probs.shape[1] - valid_ind = np.where(ind != (blank - 1))[0] - if len(valid_ind) == 0: - continue - score = np.mean(probs[valid_ind, ind[valid_ind]]) - # rec_res.append([preds_text, score]) - rec_res[indices[beg_img_no + rno]] = [preds_text, score] - else: - rec_idx_batch = self.output_tensors[0].copy_to_cpu() - predict_batch = self.output_tensors[1].copy_to_cpu() - elapse = time.time() - starttime - predict_time += elapse - for rno in range(len(rec_idx_batch)): - end_pos = np.where(rec_idx_batch[rno, :] == 1)[0] - if len(end_pos) <= 1: - preds = rec_idx_batch[rno, 1:] - score = np.mean(predict_batch[rno, 1:]) - else: - preds = rec_idx_batch[rno, 1:end_pos[1]] - score = np.mean(predict_batch[rno, 1:end_pos[1]]) - preds_text = self.char_ops.decode(preds) - # rec_res.append([preds_text, score]) - rec_res[indices[beg_img_no + rno]] = [preds_text, score] - - return rec_res, predict_time + outputs = [] + for output_tensor in self.output_tensors: + output = output_tensor.copy_to_cpu() + outputs.append(output) + preds = outputs[0] + rec_res = self.postprocess_op(preds) + elapse = time.time() - starttime + return rec_res, elapse def main(args): @@ -183,9 +142,10 @@ def main(args): exit() for ino in range(len(img_list)): print("Predicts of %s:%s" % (valid_image_file_list[ino], rec_res[ino])) - print("Total predict time for %d images:%.3f" % + print("Total predict time for %d images, cost: %.3f" % (len(img_list), predict_time)) if __name__ == "__main__": + logger = get_logger() main(utility.parse_args()) From d1affce65afcded75f6b8cba7d77fea82169c1f1 Mon Sep 17 00:00:00 2001 From: WenmuZhou Date: Thu, 12 Nov 2020 17:14:33 +0800 Subject: [PATCH 3/4] delete fluid --- ppocr/modeling/heads/rec_ctc_head.py | 3 ++- ppocr/optimizer/regularizer.py | 8 +++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/ppocr/modeling/heads/rec_ctc_head.py b/ppocr/modeling/heads/rec_ctc_head.py index 27c8c7c7..69d4ef50 100755 --- a/ppocr/modeling/heads/rec_ctc_head.py +++ b/ppocr/modeling/heads/rec_ctc_head.py @@ -24,7 +24,7 @@ from paddle.nn import functional as F def get_para_bias_attr(l2_decay, k, name): - regularizer = paddle.fluid.regularizer.L2Decay(l2_decay) + regularizer = paddle.regularizer.L2Decay(l2_decay) stdv = 1.0 / math.sqrt(k * 1.0) initializer = nn.initializer.Uniform(-stdv, stdv) weight_attr = ParamAttr( @@ -33,6 +33,7 @@ def get_para_bias_attr(l2_decay, k, name): regularizer=regularizer, initializer=initializer, name=name + "_b_attr") return [weight_attr, bias_attr] + class CTCHead(nn.Layer): def __init__(self, in_channels, out_channels, fc_decay=0.0004, **kwargs): super(CTCHead, self).__init__() diff --git a/ppocr/optimizer/regularizer.py b/ppocr/optimizer/regularizer.py index 8ac1b81f..c6396f33 100644 --- a/ppocr/optimizer/regularizer.py +++ b/ppocr/optimizer/regularizer.py @@ -17,7 +17,7 @@ from __future__ import division from __future__ import print_function from __future__ import unicode_literals -from paddle import fluid +import paddle class L1Decay(object): @@ -32,8 +32,7 @@ class L1Decay(object): self.regularization_coeff = factor def __call__(self): - reg = fluid.regularizer.L1Decay( - regularization_coeff=self.regularization_coeff) + reg = paddle.regularizer.L1Decay(self.regularization_coeff) return reg @@ -49,6 +48,5 @@ class L2Decay(object): self.regularization_coeff = factor def __call__(self): - reg = fluid.regularizer.L2Decay( - regularization_coeff=self.regularization_coeff) + reg = paddle.regularizer.L2Decay(self.regularization_coeff) return reg From d4facfe4e591db71193b69e3e1b32e663c64100c Mon Sep 17 00:00:00 2001 From: WenmuZhou Date: Fri, 13 Nov 2020 17:01:41 +0800 Subject: [PATCH 4/4] delete fluid --- ppocr/postprocess/rec_postprocess.py | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/ppocr/postprocess/rec_postprocess.py b/ppocr/postprocess/rec_postprocess.py index 03208227..19129a54 100644 --- a/ppocr/postprocess/rec_postprocess.py +++ b/ppocr/postprocess/rec_postprocess.py @@ -102,7 +102,6 @@ class CTCLabelDecode(BaseRecLabelDecode): def __call__(self, preds, label=None, *args, **kwargs): if isinstance(preds, paddle.Tensor): preds = preds.numpy() - # out = self.decode_preds(preds) preds_idx = preds.argmax(axis=2) preds_prob = preds.max(axis=2) @@ -116,27 +115,6 @@ class CTCLabelDecode(BaseRecLabelDecode): dict_character = ['blank'] + dict_character return dict_character - def decode_preds(self, preds): - probs_ind = np.argmax(preds, axis=2) - - B, N, _ = preds.shape - l = np.ones(B).astype(np.int64) * N - length = paddle.to_tensor(l) - out = paddle.fluid.layers.ctc_greedy_decoder(preds, 0, length) - batch_res = [ - x[:idx[0]] for x, idx in zip(out[0].numpy(), out[1].numpy()) - ] - - result_list = [] - for sample_idx, ind, prob in zip(batch_res, probs_ind, preds): - char_list = [self.character[idx] for idx in sample_idx] - valid_ind = np.where(ind != 0)[0] - if len(valid_ind) == 0: - continue - conf_list = prob[valid_ind, ind[valid_ind]] - result_list.append((''.join(char_list), conf_list)) - return result_list - class AttnLabelDecode(BaseRecLabelDecode): """ Convert between text-label and text-index """