From e5a2062d9e6e6415d7c201718be4664315b65de5 Mon Sep 17 00:00:00 2001 From: tink2123 Date: Mon, 25 May 2020 17:10:04 +0800 Subject: [PATCH 1/2] Adaptive Windows --- ppocr/data/reader_main.py | 2 ++ ppocr/data/rec/dataset_traversal.py | 10 ++++++++-- ppocr/utils/character.py | 2 +- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/ppocr/data/reader_main.py b/ppocr/data/reader_main.py index 55bd1e08..b0df0d46 100755 --- a/ppocr/data/reader_main.py +++ b/ppocr/data/reader_main.py @@ -66,6 +66,8 @@ def reader_main(config=None, mode=None): reader_function = params['reader_function'] function = create_module(reader_function)(params) if mode == "train": + if sys.platform == "win32": + return function(0) readers = [] num_workers = params['num_workers'] for process_id in range(num_workers): diff --git a/ppocr/data/rec/dataset_traversal.py b/ppocr/data/rec/dataset_traversal.py index 357a89fb..f60b9fe3 100755 --- a/ppocr/data/rec/dataset_traversal.py +++ b/ppocr/data/rec/dataset_traversal.py @@ -13,6 +13,7 @@ #limitations under the License. import os +import sys import math import random import numpy as np @@ -191,16 +192,21 @@ class SimpleReader(object): img_num = len(label_infor_list) img_id_list = list(range(img_num)) random.shuffle(img_id_list) + if sys.platform=="win32": + print("multiprocess is not fully compatible with Windows." + "num_workers will be 1.") + self.num_workers = 1 for img_id in range(process_id, img_num, self.num_workers): label_infor = label_infor_list[img_id_list[img_id]] substr = label_infor.decode('utf-8').strip("\n").split("\t") img_path = self.img_set_dir + "/" + substr[0] img = cv2.imread(img_path) - if img.shape[-1]==1 or len(list(img.shape))==2: - img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) if img is None: logger.info("{} does not exist!".format(img_path)) continue + if img.shape[-1]==1 or len(list(img.shape))==2: + img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) + label = substr[1] outs = process_image(img, self.image_shape, label, self.char_ops, self.loss_type, diff --git a/ppocr/utils/character.py b/ppocr/utils/character.py index b4075039..3cbc31a4 100755 --- a/ppocr/utils/character.py +++ b/ppocr/utils/character.py @@ -34,7 +34,7 @@ class CharacterOps(object): with open(character_dict_path, "rb") as fin: lines = fin.readlines() for line in lines: - line = line.decode('utf-8').strip("\n") + line = line.decode('utf-8').strip("\n").strip("\r\n") self.character_str += line dict_character = list(self.character_str) elif self.character_type == "en_sensitive": From 53952a46c6092e6f9c8f3bca5997515218d2ac22 Mon Sep 17 00:00:00 2001 From: tink2123 Date: Mon, 25 May 2020 17:40:26 +0800 Subject: [PATCH 2/2] modified det reader --- ppocr/data/det/dataset_traversal.py | 5 +++++ 1 file changed, 5 insertions(+) mode change 100755 => 100644 ppocr/data/det/dataset_traversal.py diff --git a/ppocr/data/det/dataset_traversal.py b/ppocr/data/det/dataset_traversal.py old mode 100755 new mode 100644 index 3051c60d..272d7317 --- a/ppocr/data/det/dataset_traversal.py +++ b/ppocr/data/det/dataset_traversal.py @@ -13,6 +13,7 @@ #limitations under the License. import os +import sys import math import random import functools @@ -42,6 +43,10 @@ class TrainReader(object): img_num = len(label_infor_list) img_id_list = list(range(img_num)) random.shuffle(img_id_list) + if sys.platform == "win32": + print("multiprocess is not fully compatible with Windows." + "num_workers will be 1.") + self.num_workers = 1 for img_id in range(process_id, img_num, self.num_workers): label_infor = label_infor_list[img_id_list[img_id]] outs = self.process(label_infor)