diff --git a/tools/infer/det_program.txt b/tools/infer/det_program.txt deleted file mode 100644 index 0da7070f..00000000 --- a/tools/infer/det_program.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/tools/tmp/eval_det.py b/tools/tmp/eval_det.py deleted file mode 100755 index 38929f26..00000000 --- a/tools/tmp/eval_det.py +++ /dev/null @@ -1,134 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import sys -import time -import numpy as np -from copy import deepcopy -import json - -# from paddle.fluid.contrib.model_stat import summary - - -def set_paddle_flags(**kwargs): - for key, value in kwargs.items(): - if os.environ.get(key, None) is None: - os.environ[key] = str(value) - - -# NOTE(paddle-dev): All of these flags should be -# set before `import paddle`. Otherwise, it would -# not take any effect. -set_paddle_flags( - FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory -) - -from paddle import fluid -from ppocr.utils.utility import create_module -from ppocr.utils.utility import load_config, merge_config -import ppocr.data.det.reader_main as reader -from ppocr.utils.utility import ArgsParser -from ppocr.utils.check import check_gpu -from ppocr.utils.checkpoint import load_pretrain, load_checkpoint, save, save_model - -from ppocr.utils.utility import initial_logger -logger = initial_logger() -from ppocr.utils.eval_utils import eval_det_run - - -def draw_det_res(dt_boxes, config, img_name, ino): - if len(dt_boxes) > 0: - img_set_path = config['TestReader']['img_set_dir'] - img_path = img_set_path + img_name - import cv2 - src_im = cv2.imread(img_path) - for box in dt_boxes: - box = box.astype(np.int32).reshape((-1, 1, 2)) - cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2) - cv2.imwrite("tmp%d.jpg" % ino, src_im) - - -def main(): - config = load_config(FLAGS.config) - merge_config(FLAGS.opt) - print(config) - - # check if set use_gpu=True in paddlepaddle cpu version - use_gpu = config['Global']['use_gpu'] - check_gpu(use_gpu) - - place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - - det_model = create_module(config['Architecture']['function'])(params=config) - - startup_prog = fluid.Program() - eval_prog = fluid.Program() - with fluid.program_guard(eval_prog, startup_prog): - with fluid.unique_name.guard(): - eval_loader, eval_outputs = det_model(mode="test") - eval_fetch_list = [v.name for v in eval_outputs] - eval_prog = eval_prog.clone(for_test=True) - exe.run(startup_prog) - - pretrain_weights = config['Global']['pretrain_weights'] - if pretrain_weights is not None: - load_pretrain(exe, eval_prog, pretrain_weights) -# fluid.load(eval_prog, pretrain_weights) -# def if_exist(var): -# return os.path.exists(os.path.join(pretrain_weights, var.name)) -# fluid.io.load_vars(exe, pretrain_weights, predicate=if_exist, main_program=eval_prog) - else: - logger.info("Not find pretrain_weights:%s" % pretrain_weights) - sys.exit(0) - -# fluid.io.save_inference_model("./output/", feeded_var_names=['image'], -# target_vars=eval_outputs, executor=exe, main_program=eval_prog, -# model_filename="model", params_filename="params") -# sys.exit(-1) - - metrics = eval_det_run(exe, eval_prog, eval_fetch_list, config, "test") - logger.info("metrics:{}".format(metrics)) - logger.info("success!") - - -def test_reader(): - config = load_config(FLAGS.config) - merge_config(FLAGS.opt) - print(config) - tmp_reader = reader.test_reader(config=config) - count = 0 - print_count = 0 - import time - starttime = time.time() - for data in tmp_reader(): - count += len(data) - print_count += 1 - if print_count % 10 == 0: - batch_time = (time.time() - starttime) / print_count - print("reader:", count, len(data), batch_time) - print("finish reader:", count) - print("success") - - -if __name__ == '__main__': - parser = ArgsParser() - FLAGS = parser.parse_args() - main() -# test_reader() diff --git a/tools/tmp/infer_det.py b/tools/tmp/infer_det.py deleted file mode 100755 index 21c2e1cc..00000000 --- a/tools/tmp/infer_det.py +++ /dev/null @@ -1,160 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import sys -import time -import numpy as np -from copy import deepcopy -import json - -# from paddle.fluid.contrib.model_stat import summary - - -def set_paddle_flags(**kwargs): - for key, value in kwargs.items(): - if os.environ.get(key, None) is None: - os.environ[key] = str(value) - - -# NOTE(paddle-dev): All of these flags should be -# set before `import paddle`. Otherwise, it would -# not take any effect. -set_paddle_flags( - FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory -) - -from paddle import fluid -from ppocr.utils.utility import create_module -from ppocr.utils.utility import load_config, merge_config -import ppocr.data.det.reader_main as reader -from ppocr.utils.utility import ArgsParser -from ppocr.utils.check import check_gpu -from ppocr.utils.checkpoint import load_pretrain, load_checkpoint, save, save_model - -from ppocr.utils.utility import initial_logger -logger = initial_logger() -from ppocr.utils.eval_utils import eval_det_run - - -def draw_det_res(dt_boxes, config, img_name, ino): - if len(dt_boxes) > 0: - img_set_path = config['TestReader']['img_set_dir'] - img_path = img_set_path + img_name - import cv2 - src_im = cv2.imread(img_path) - for box in dt_boxes: - box = box.astype(np.int32).reshape((-1, 1, 2)) - cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2) - cv2.imwrite("tmp%d.jpg" % ino, src_im) - - -def main(): - config = load_config(FLAGS.config) - merge_config(FLAGS.opt) - print(config) - - # check if set use_gpu=True in paddlepaddle cpu version - use_gpu = config['Global']['use_gpu'] - check_gpu(use_gpu) - - place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - - det_model = create_module(config['Architecture']['function'])(params=config) - - startup_prog = fluid.Program() - eval_prog = fluid.Program() - with fluid.program_guard(eval_prog, startup_prog): - with fluid.unique_name.guard(): - eval_outputs = det_model(mode="test") - eval_fetch_list = [v.name for v in eval_outputs] - eval_prog = eval_prog.clone(for_test=True) - exe.run(startup_prog) - - pretrain_weights = config['Global']['pretrain_weights'] - if pretrain_weights is not None: - fluid.load(eval_prog, pretrain_weights) - else: - logger.info("Not find pretrain_weights:%s" % pretrain_weights) - sys.exit(0) - - save_res_path = config['Global']['save_res_path'] - with open(save_res_path, "wb") as fout: - test_reader = reader.test_reader(config=config) - tackling_num = 0 - for data in test_reader(): - img_num = len(data) - tackling_num = tackling_num + img_num - logger.info("tackling_num:%d", tackling_num) - img_list = [] - ratio_list = [] - img_name_list = [] - for ino in range(img_num): - img_list.append(data[ino][0]) - ratio_list.append(data[ino][1]) - img_name_list.append(data[ino][2]) - img_list = np.concatenate(img_list, axis=0) - outs = exe.run(eval_prog,\ - feed={'image': img_list},\ - fetch_list=eval_fetch_list) - - global_params = config['Global'] - postprocess_params = deepcopy(config["PostProcess"]) - postprocess_params.update(global_params) - postprocess = create_module(postprocess_params['function'])\ - (params=postprocess_params) - dt_boxes_list = postprocess(outs, ratio_list) - for ino in range(img_num): - dt_boxes = dt_boxes_list[ino] - img_name = img_name_list[ino] - dt_boxes_json = [] - for box in dt_boxes: - tmp_json = {"transcription": ""} - tmp_json['points'] = box.tolist() - dt_boxes_json.append(tmp_json) - otstr = img_name + "\t" + json.dumps(dt_boxes_json) + "\n" - fout.write(otstr.encode()) - #draw_det_res(dt_boxes, config, img_name, ino) - logger.info("success!") - - -def test_reader(): - config = load_config(FLAGS.config) - merge_config(FLAGS.opt) - print(config) - tmp_reader = reader.test_reader(config=config) - count = 0 - print_count = 0 - import time - starttime = time.time() - for data in tmp_reader(): - count += len(data) - print_count += 1 - if print_count % 10 == 0: - batch_time = (time.time() - starttime) / print_count - print("reader:", count, len(data), batch_time) - print("finish reader:", count) - print("success") - - -if __name__ == '__main__': - parser = ArgsParser() - FLAGS = parser.parse_args() - main() -# test_reader() diff --git a/tools/tmp/infer_rec.py b/tools/tmp/infer_rec.py deleted file mode 100755 index ea308953..00000000 --- a/tools/tmp/infer_rec.py +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import time -import multiprocessing -import numpy as np - - -def set_paddle_flags(**kwargs): - for key, value in kwargs.items(): - if os.environ.get(key, None) is None: - os.environ[key] = str(value) - - -# NOTE(paddle-dev): All of these flags should be -# set before `import paddle`. Otherwise, it would -# not take any effect. -set_paddle_flags( - FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory -) - -from paddle import fluid - -from ppocr.utils.utility import load_config, merge_config -from ppocr.data.rec.reader_main import test_reader - -from ppocr.utils.utility import ArgsParser -from ppocr.utils.character import CharacterOps, cal_predicts_accuracy -from ppocr.utils.check import check_gpu -from ppocr.utils.utility import create_module - -from ppocr.utils.utility import initial_logger -logger = initial_logger() - - -def main(): - config = load_config(FLAGS.config) - merge_config(FLAGS.opt) - char_ops = CharacterOps(config['Global']) - config['Global']['char_num'] = char_ops.get_char_num() - - # check if set use_gpu=True in paddlepaddle cpu version - use_gpu = config['Global']['use_gpu'] - check_gpu(use_gpu) - - place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - - rec_model = create_module(config['Architecture']['function'])(params=config) - - startup_prog = fluid.Program() - eval_prog = fluid.Program() - with fluid.program_guard(eval_prog, startup_prog): - with fluid.unique_name.guard(): - eval_outputs = rec_model(mode="test") - eval_fetch_list = [v.name for v in eval_outputs] - eval_prog = eval_prog.clone(for_test=True) - exe.run(startup_prog) - - pretrain_weights = config['Global']['pretrain_weights'] - if pretrain_weights is not None: - fluid.load(eval_prog, pretrain_weights) - - test_img_path = config['test_img_path'] - image_shape = config['Global']['image_shape'] - blobs = test_reader(image_shape, test_img_path) - predict = exe.run(program=eval_prog, - feed={"image": blobs}, - fetch_list=eval_fetch_list, - return_numpy=False) - preds = np.array(predict[0]) - if preds.shape[1] == 1: - preds = preds.reshape(-1) - preds_lod = predict[0].lod()[0] - preds_text = char_ops.decode(preds) - else: - end_pos = np.where(preds[0, :] == 1)[0] - if len(end_pos) <= 1: - preds_text = preds[0, 1:] - else: - preds_text = preds[0, 1:end_pos[1]] - preds_text = preds_text.reshape(-1) - preds_text = char_ops.decode(preds_text) - - fluid.io.save_inference_model( - "./output/", - feeded_var_names=['image'], - target_vars=eval_outputs, - executor=exe, - main_program=eval_prog, - model_filename="model", - params_filename="params") - print(preds) - print(preds_text) - - -if __name__ == '__main__': - parser = ArgsParser() - FLAGS = parser.parse_args() - main() diff --git a/tools/tmp/test_rec_benchmark.py b/tools/tmp/test_rec_benchmark.py deleted file mode 100755 index 33cd136c..00000000 --- a/tools/tmp/test_rec_benchmark.py +++ /dev/null @@ -1,128 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import time -import multiprocessing -import numpy as np - - -def set_paddle_flags(**kwargs): - for key, value in kwargs.items(): - if os.environ.get(key, None) is None: - os.environ[key] = str(value) - - -# NOTE(paddle-dev): All of these flags should be -# set before `import paddle`. Otherwise, it would -# not take any effect. -set_paddle_flags( - FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory -) - -from paddle import fluid - -from ppocr.utils.utility import load_config, merge_config -import ppocr.data.rec.reader_main as reader - -from ppocr.utils.utility import ArgsParser -from ppocr.utils.character import CharacterOps, cal_predicts_accuracy -from ppocr.utils.check import check_gpu -from ppocr.utils.utility import create_module - -from ppocr.utils.eval_utils import eval_run - -from ppocr.utils.utility import initial_logger -logger = initial_logger() - - -def main(): - config = load_config(FLAGS.config) - merge_config(FLAGS.opt) - char_ops = CharacterOps(config['Global']) - config['Global']['char_num'] = char_ops.get_char_num() - - # check if set use_gpu=True in paddlepaddle cpu version - use_gpu = config['Global']['use_gpu'] - check_gpu(use_gpu) - - if use_gpu: - devices_num = fluid.core.get_cuda_device_count() - else: - devices_num = int( - os.environ.get('CPU_NUM', multiprocessing.cpu_count())) - - place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - - rec_model = create_module(config['Architecture']['function'])(params=config) - - startup_prog = fluid.Program() - eval_prog = fluid.Program() - with fluid.program_guard(eval_prog, startup_prog): - with fluid.unique_name.guard(): - eval_loader, eval_outputs = rec_model(mode="eval") - eval_fetch_list = [v.name for v in eval_outputs] - eval_prog = eval_prog.clone(for_test=True) - - exe.run(startup_prog) - pretrain_weights = config['Global']['pretrain_weights'] - if pretrain_weights is not None: - fluid.load(eval_prog, pretrain_weights) - - eval_data_list = ['IIIT5k_3000', 'SVT', 'IC03_860', 'IC03_867',\ - 'IC13_857', 'IC13_1015', 'IC15_1811', 'IC15_2077', 'SVTP', 'CUTE80'] - eval_data_dir = config['TestReader']['lmdb_sets_dir'] - total_forward_time = 0 - total_evaluation_data_number = 0 - total_correct_number = 0 - eval_data_acc_info = {} - for eval_data in eval_data_list: - config['TestReader']['lmdb_sets_dir'] = \ - eval_data_dir + "/" + eval_data - eval_reader = reader.train_eval_reader( - config=config, char_ops=char_ops, mode="test") - eval_loader.set_sample_list_generator(eval_reader, places=place) - - start_time = time.time() - outs = eval_run(exe, eval_prog, eval_loader, eval_fetch_list, char_ops, - "best", "test") - infer_time = time.time() - start_time - eval_acc, acc_num, sample_num = outs - total_forward_time += infer_time - total_evaluation_data_number += sample_num - total_correct_number += acc_num - eval_data_acc_info[eval_data] = outs - - avg_forward_time = total_forward_time / total_evaluation_data_number - avg_acc = total_correct_number * 1.0 / total_evaluation_data_number - logger.info('-' * 50) - strs = "" - for eval_data in eval_data_list: - eval_acc, acc_num, sample_num = eval_data_acc_info[eval_data] - strs += "\n {}, accuracy:{:.6f}".format(eval_data, eval_acc) - strs += "\n average, accuracy:{:.6f}, time:{:.6f}".format(avg_acc, - avg_forward_time) - logger.info(strs) - logger.info('-' * 50) - - -if __name__ == '__main__': - parser = ArgsParser() - FLAGS = parser.parse_args() - main() diff --git a/tools/tmp/train_det.py b/tools/tmp/train_det.py deleted file mode 100755 index bfa2f632..00000000 --- a/tools/tmp/train_det.py +++ /dev/null @@ -1,216 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import sys -import time -import multiprocessing -import numpy as np - -# from paddle.fluid.contrib.model_stat import summary - - -def set_paddle_flags(**kwargs): - for key, value in kwargs.items(): - if os.environ.get(key, None) is None: - os.environ[key] = str(value) - - -# NOTE(paddle-dev): All of these flags should be -# set before `import paddle`. Otherwise, it would -# not take any effect. -set_paddle_flags( - FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory -) - -from paddle import fluid -from ppocr.utils.utility import create_module -from ppocr.utils.utility import load_config, merge_config -import ppocr.data.det.reader_main as reader -from ppocr.utils.utility import ArgsParser -from ppocr.utils.character import CharacterOps, cal_predicts_accuracy -from ppocr.utils.check import check_gpu -from ppocr.utils.stats import TrainingStats -from ppocr.utils.checkpoint import load_pretrain, load_checkpoint, save, save_model -from ppocr.utils.eval_utils import eval_run -from ppocr.utils.eval_utils import eval_det_run - -from ppocr.utils.utility import initial_logger -logger = initial_logger() -from ppocr.utils.utility import create_multi_devices_program - - -def main(): - config = load_config(FLAGS.config) - merge_config(FLAGS.opt) - print(config) - - alg = config['Global']['algorithm'] - assert alg in ['EAST', 'DB'] - - # check if set use_gpu=True in paddlepaddle cpu version - use_gpu = config['Global']['use_gpu'] - check_gpu(use_gpu) - - place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - - det_model = create_module(config['Architecture']['function'])(params=config) - - startup_prog = fluid.Program() - train_prog = fluid.Program() - with fluid.program_guard(train_prog, startup_prog): - with fluid.unique_name.guard(): - train_loader, train_outputs = det_model(mode="train") - train_fetch_list = [v.name for v in train_outputs] - train_loss = train_outputs[0] - opt_params = config['Optimizer'] - optimizer = create_module(opt_params['function'])(opt_params) - optimizer.minimize(train_loss) - global_lr = optimizer._global_learning_rate() - global_lr.persistable = True - train_fetch_list.append(global_lr.name) - - eval_prog = fluid.Program() - with fluid.program_guard(eval_prog, startup_prog): - with fluid.unique_name.guard(): - eval_loader, eval_outputs = det_model(mode="eval") - eval_fetch_list = [v.name for v in eval_outputs] - eval_prog = eval_prog.clone(for_test=True) - - train_reader = reader.train_reader(config=config) - train_loader.set_sample_list_generator(train_reader, places=place) - - exe.run(startup_prog) - - # compile program for multi-devices - train_compile_program = create_multi_devices_program(train_prog, - train_loss.name) - - pretrain_weights = config['Global']['pretrain_weights'] - if pretrain_weights is not None: - load_pretrain(exe, train_prog, pretrain_weights) - print("pretrain weights loaded!") - - train_batch_id = 0 - if alg == 'EAST': - train_log_keys = ['loss_total', 'loss_cls', 'loss_offset'] - elif alg == 'DB': - train_log_keys = [ - 'loss_total', 'loss_shrink', 'loss_threshold', 'loss_binary' - ] - log_smooth_window = config['Global']['log_smooth_window'] - epoch_num = config['Global']['epoch_num'] - print_step = config['Global']['print_step'] - eval_step = config['Global']['eval_step'] - save_epoch_step = config['Global']['save_epoch_step'] - save_dir = config['Global']['save_dir'] - train_stats = TrainingStats(log_smooth_window, train_log_keys) - best_eval_hmean = -1 - best_batch_id = 0 - best_epoch = 0 - for epoch in range(epoch_num): - train_loader.start() - try: - while True: - t1 = time.time() - train_outs = exe.run(program=train_compile_program, - fetch_list=train_fetch_list, - return_numpy=False) - loss_total = np.mean(np.array(train_outs[0])) - if alg == 'EAST': - loss_cls = np.mean(np.array(train_outs[1])) - loss_offset = np.mean(np.array(train_outs[2])) - stats = {'loss_total':loss_total, 'loss_cls':loss_cls,\ - 'loss_offset':loss_offset} - elif alg == 'DB': - loss_shrink_maps = np.mean(np.array(train_outs[1])) - loss_threshold_maps = np.mean(np.array(train_outs[2])) - loss_binary_maps = np.mean(np.array(train_outs[3])) - stats = {'loss_total':loss_total, 'loss_shrink':loss_shrink_maps, \ - 'loss_threshold':loss_threshold_maps, 'loss_binary':loss_binary_maps} - lr = np.mean(np.array(train_outs[-1])) - t2 = time.time() - train_batch_elapse = t2 - t1 - - # stats = {'loss_total':loss_total, 'loss_cls':loss_cls,\ - # 'loss_offset':loss_offset} - train_stats.update(stats) - if train_batch_id > 0 and train_batch_id % print_step == 0: - logs = train_stats.log() - strs = 'epoch: {}, iter: {}, lr: {:.6f}, {}, time: {:.3f}'.format( - epoch, train_batch_id, lr, logs, train_batch_elapse) - logger.info(strs) - - if train_batch_id > 0 and\ - train_batch_id % eval_step == 0: - metrics = eval_det_run(exe, eval_prog, eval_fetch_list, - config, "eval") - hmean = metrics['hmean'] - if hmean >= best_eval_hmean: - best_eval_hmean = hmean - best_batch_id = train_batch_id - best_epoch = epoch - save_path = save_dir + "/best_accuracy" - save_model(train_prog, save_path) - strs = 'Test iter: {}, metrics:{}, best_hmean:{:.6f}, best_epoch:{}, best_batch_id:{}'.format( - train_batch_id, metrics, best_eval_hmean, best_epoch, - best_batch_id) - logger.info(strs) - train_batch_id += 1 - - except fluid.core.EOFException: - train_loader.reset() - - if epoch > 0 and epoch % save_epoch_step == 0: - save_path = save_dir + "/iter_epoch_%d" % (epoch) - save_model(train_prog, save_path) - - -def test_reader(): - config = load_config(FLAGS.config) - merge_config(FLAGS.opt) - print(config) - tmp_reader = reader.train_reader(config=config) - count = 0 - print_count = 0 - import time - while True: - starttime = time.time() - count = 0 - for data in tmp_reader(): - count += 1 - if print_count % 1 == 0: - batch_time = time.time() - starttime - starttime = time.time() - print("reader:", count, len(data), batch_time) - print("finish reader:", count) - print("success") - - -if __name__ == '__main__': - parser = ArgsParser() - parser.add_argument( - "-r", - "--resume_checkpoint", - default=None, - type=str, - help="Checkpoint path for resuming training.") - FLAGS = parser.parse_args() - main() - # test_reader() diff --git a/tools/tmp/train_rec.py b/tools/tmp/train_rec.py deleted file mode 100755 index 21b9a9ca..00000000 --- a/tools/tmp/train_rec.py +++ /dev/null @@ -1,222 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import sys -import time -import multiprocessing -import numpy as np - -# from paddle.fluid.contrib.model_stat import summary - - -def set_paddle_flags(**kwargs): - for key, value in kwargs.items(): - if os.environ.get(key, None) is None: - os.environ[key] = str(value) - - -# NOTE(paddle-dev): All of these flags should be -# set before `import paddle`. Otherwise, it would -# not take any effect. -set_paddle_flags( - FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory -) - -from paddle import fluid -from ppocr.utils.utility import create_module -from ppocr.utils.utility import load_config, merge_config -import ppocr.data.rec.reader_main as reader -from ppocr.utils.utility import ArgsParser -from ppocr.utils.character import CharacterOps, cal_predicts_accuracy -from ppocr.utils.check import check_gpu -from ppocr.utils.stats import TrainingStats -from ppocr.utils.checkpoint import load_pretrain, load_checkpoint, save, save_model -from ppocr.utils.eval_utils import eval_run - -from ppocr.utils.utility import initial_logger -logger = initial_logger() -from ppocr.utils.utility import create_multi_devices_program - - -def main(): - config = load_config(FLAGS.config) - merge_config(FLAGS.opt) - char_ops = CharacterOps(config['Global']) - config['Global']['char_num'] = char_ops.get_char_num() - print(config) - - # check if set use_gpu=True in paddlepaddle cpu version - use_gpu = config['Global']['use_gpu'] - check_gpu(use_gpu) - - place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - - rec_model = create_module(config['Architecture']['function'])(params=config) - - startup_prog = fluid.Program() - train_prog = fluid.Program() - with fluid.program_guard(train_prog, startup_prog): - with fluid.unique_name.guard(): - train_loader, train_outputs = rec_model(mode="train") - save_var = train_outputs[1] - - if "gradient_clip" in config['Global']: - gradient_clip = config['Global']['gradient_clip'] - clip = fluid.clip.GradientClipByGlobalNorm(gradient_clip) - fluid.clip.set_gradient_clip(clip, program=train_prog) - - train_fetch_list = [v.name for v in train_outputs] - train_loss = train_outputs[0] - opt_params = config['Optimizer'] - optimizer = create_module(opt_params['function'])(opt_params) - optimizer.minimize(train_loss) - global_lr = optimizer._global_learning_rate() - global_lr.persistable = True - train_fetch_list.append(global_lr.name) - - train_reader = reader.train_eval_reader( - config=config, char_ops=char_ops, mode="train") - train_loader.set_sample_list_generator(train_reader, places=place) - - eval_prog = fluid.Program() - with fluid.program_guard(eval_prog, startup_prog): - with fluid.unique_name.guard(): - eval_loader, eval_outputs = rec_model(mode="eval") - eval_fetch_list = [v.name for v in eval_outputs] - - eval_prog = eval_prog.clone(for_test=True) - exe.run(startup_prog) - - eval_reader = reader.train_eval_reader( - config=config, char_ops=char_ops, mode="eval") - eval_loader.set_sample_list_generator(eval_reader, places=place) - - # compile program for multi-devices - train_compile_program = create_multi_devices_program(train_prog, - train_loss.name) - - pretrain_weights = config['Global']['pretrain_weights'] - if pretrain_weights is not None: - load_pretrain(exe, train_prog, pretrain_weights) - - train_batch_id = 0 - train_log_keys = ['loss', 'acc'] - log_smooth_window = config['Global']['log_smooth_window'] - epoch_num = config['Global']['epoch_num'] - loss_type = config['Global']['loss_type'] - print_step = config['Global']['print_step'] - eval_step = config['Global']['eval_step'] - save_epoch_step = config['Global']['save_epoch_step'] - save_dir = config['Global']['save_dir'] - train_stats = TrainingStats(log_smooth_window, train_log_keys) - best_eval_acc = -1 - best_batch_id = 0 - best_epoch = 0 - for epoch in range(epoch_num): - train_loader.start() - try: - while True: - t1 = time.time() - train_outs = exe.run(program=train_compile_program, - fetch_list=train_fetch_list, - return_numpy=False) - loss = np.mean(np.array(train_outs[0])) - lr = np.mean(np.array(train_outs[-1])) - - preds = np.array(train_outs[1]) - preds_lod = train_outs[1].lod()[0] - labels = np.array(train_outs[2]) - labels_lod = train_outs[2].lod()[0] - - acc, acc_num, img_num = cal_predicts_accuracy( - char_ops, preds, preds_lod, labels, labels_lod) - - t2 = time.time() - train_batch_elapse = t2 - t1 - - stats = {'loss': loss, 'acc': acc} - train_stats.update(stats) - if train_batch_id > 0 and train_batch_id % print_step == 0: - logs = train_stats.log() - strs = 'epoch: {}, iter: {}, lr: {:.6f}, {}, time: {:.3f}'.format( - epoch, train_batch_id, lr, logs, train_batch_elapse) - logger.info(strs) - - if train_batch_id > 0 and train_batch_id % eval_step == 0: - outs = eval_run(exe, eval_prog, eval_loader, - eval_fetch_list, char_ops, train_batch_id, - "eval") - eval_acc, acc_num, sample_num = outs - if eval_acc > best_eval_acc: - best_eval_acc = eval_acc - best_batch_id = train_batch_id - best_epoch = epoch - save_path = save_dir + "/best_accuracy" - save_model(train_prog, save_path) - - strs = 'Test iter: {}, acc:{:.6f}, best_acc:{:.6f}, best_epoch:{}, best_batch_id:{}, sample_num:{}'.format( - train_batch_id, eval_acc, best_eval_acc, best_epoch, - best_batch_id, sample_num) - logger.info(strs) - train_batch_id += 1 - - except fluid.core.EOFException: - train_loader.reset() - - if epoch > 0 and epoch % save_epoch_step == 0: - save_path = save_dir + "/iter_epoch_%d" % (epoch) - save_model(train_prog, save_path) - - -def test_reader(): - config = load_config(FLAGS.config) - merge_config(FLAGS.opt) - char_ops = CharacterOps(config['Global']) - config['Global']['char_num'] = char_ops.get_char_num() - print(config) - # tmp_reader = reader.train_eval_reader( - # config=cfg, char_ops=char_ops, mode="train") - tmp_reader = reader.train_eval_reader( - config=config, char_ops=char_ops, mode="eval") - count = 0 - print_count = 0 - import time - starttime = time.time() - for data in tmp_reader(): - count += len(data) - print_count += 1 - if print_count % 10 == 0: - batch_time = (time.time() - starttime) / print_count - print("reader:", count, len(data), batch_time) - print("finish reader:", count) - print("success") - - -if __name__ == '__main__': - parser = ArgsParser() - parser.add_argument( - "-r", - "--resume_checkpoint", - default=None, - type=str, - help="Checkpoint path for resuming training.") - FLAGS = parser.parse_args() - main() -# test_reader()