diff --git a/MANIFEST.in b/MANIFEST.in index cd34d574..cd1c9636 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,4 @@ -include LICENSE.txt +include LICENSE include README.md recursive-include ppocr/utils *.txt utility.py logging.py network.py diff --git a/ppstructure/MANIFEST.in b/ppstructure/MANIFEST.in index f9bd0fe9..2961e722 100644 --- a/ppstructure/MANIFEST.in +++ b/ppstructure/MANIFEST.in @@ -1,9 +1,9 @@ -include LICENSE.txt +include LICENSE include README.md recursive-include ppocr/utils *.txt utility.py logging.py network.py recursive-include ppocr/data/ *.py recursive-include ppocr/postprocess *.py recursive-include tools/infer *.py -recursive-include table *.py -recursive-include ppstructure *.py \ No newline at end of file +recursive-include ppstructure *.py + diff --git a/ppstructure/README_ch.md b/ppstructure/README_ch.md index e69de29b..7ad154f8 100644 --- a/ppstructure/README_ch.md +++ b/ppstructure/README_ch.md @@ -0,0 +1 @@ +# TableStructurer \ No newline at end of file diff --git a/ppstructure/paddlestructure.py b/ppstructure/paddlestructure.py index 7c25f9c0..cf49fd99 100644 --- a/ppstructure/paddlestructure.py +++ b/ppstructure/paddlestructure.py @@ -16,15 +16,15 @@ import os import sys __dir__ = os.path.dirname(__file__) -sys.path.append(os.path.join(__dir__, '')) - +sys.path.append(__dir__) +sys.path.append(os.path.join(__dir__, '..')) import cv2 import numpy as np from pathlib import Path from ppocr.utils.logging import get_logger -from predict_system import OCRSystem, save_res +from ppstructure.predict_system import OCRSystem, save_res from utility import init_args logger = get_logger() @@ -93,9 +93,11 @@ class PaddleStructure(OCRSystem): params.rec_char_dict_path = str(Path(__file__).parent.parent / 'ppocr/utils/dict/table_dict.txt') if params.structure_char_dict_path is None: if os.path.exists(str(Path(__file__).parent / 'ppocr/utils/dict/table_structure_dict.txt')): - params.structure_char_dict_path = str(Path(__file__).parent / 'ppocr/utils/dict/table_structure_dict.txt') + params.structure_char_dict_path = str( + Path(__file__).parent / 'ppocr/utils/dict/table_structure_dict.txt') else: - params.structure_char_dict_path = str(Path(__file__).parent.parent / 'ppocr/utils/dict/table_structure_dict.txt') + params.structure_char_dict_path = str( + Path(__file__).parent.parent / 'ppocr/utils/dict/table_structure_dict.txt') print(params) super().__init__(params) @@ -146,3 +148,16 @@ def main(): logger.info(item['res']) save_res(result, save_folder, img_name) logger.info('result save to {}'.format(os.path.join(save_folder, img_name))) + + +if __name__ == '__main__': + table_engine = PaddleStructure( + det_model_dir='/Users/zhoujun20/Desktop/工作相关/table/table_pr/PaddleOCR/inference/table/ch_ppocr_mobile_v2.0_table_det_infer', + rec_model_dir='/Users/zhoujun20/Desktop/工作相关/table/table_pr/PaddleOCR/inference/table/ch_ppocr_mobile_v2.0_table_rec_infer', + structure_model_dir='/Users/zhoujun20/Desktop/工作相关/table/table_pr/PaddleOCR/inference/table/ch_ppocr_mobile_v2.0_table_structure_infer', + output='/Users/zhoujun20/Desktop/工作相关/table/table_pr/PaddleOCR/output/table', + show_log=True) + img = cv2.imread('/Users/zhoujun20/Desktop/工作相关/table/table_pr/PaddleOCR/ppstructure/test_imgs/table_1.png') + result = table_engine(img) + for line in result: + print(line) diff --git a/ppstructure/setup.py b/ppstructure/setup.py index 493599b7..0d7b2b9a 100644 --- a/ppstructure/setup.py +++ b/ppstructure/setup.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import os from setuptools import setup from io import open @@ -20,6 +21,7 @@ with open('../requirements.txt', encoding="utf-8-sig") as f: requirements = f.readlines() requirements.append('tqdm') requirements.append('layoutparser') + requirements.append('iopath') def readme(): @@ -27,9 +29,13 @@ def readme(): README = f.read() return README -shutil.copytree('../ppocr','./ppocr') -shutil.copytree('../tools','./tools') -shutil.copytree('../ppstructure','./ppstructure') + +shutil.copytree('../ppstructure/table', './ppstructure/table') +shutil.copyfile('../ppstructure/predict_system.py', './ppstructure/predict_system.py') +shutil.copyfile('../ppstructure/utility.py', './ppstructure/utility.py') +shutil.copytree('../ppocr', './ppocr') +shutil.copytree('../tools', './tools') +shutil.copyfile('../LICENSE', './LICENSE') setup( name='paddlestructure', @@ -62,4 +68,5 @@ setup( shutil.rmtree('ppocr') shutil.rmtree('tools') -shutil.rmtree('ppstructure') \ No newline at end of file +shutil.rmtree('ppstructure') +os.remove('LICENSE') diff --git a/ppstructure/table/README_ch.md b/ppstructure/table/README_ch.md index effd1cf2..10523106 100644 --- a/ppstructure/table/README_ch.md +++ b/ppstructure/table/README_ch.md @@ -8,7 +8,7 @@ python3 table/predict_table.py --det_model_dir=../inference/db --rec_model_dir=. ``` 运行完成后,每张图片的excel表格会保存到table_output字段指定的目录下 -eval +评估 ```python python3 table/eval_table.py --det_model_dir=../inference/db --rec_model_dir=../inference/rec_mv3_large1.0/infer --table_model_dir=../inference/explite3/infer --image_dir=../table/imgs --rec_char_dict_path=../ppocr/utils/dict/table_dict.txt --table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt --rec_char_type=EN --det_limit_side_len=736 --det_limit_type=min --gt_path=path/to/gt.json diff --git a/ppstructure/table/predict_table.py b/ppstructure/table/predict_table.py index 4a247e40..c4edd22c 100644 --- a/ppstructure/table/predict_table.py +++ b/ppstructure/table/predict_table.py @@ -28,11 +28,11 @@ import numpy as np import time import tools.infer.predict_rec as predict_rec import tools.infer.predict_det as predict_det -import ppstructure.table.predict_structure as predict_strture from ppocr.utils.utility import get_image_file_list, check_and_read_gif from ppocr.utils.logging import get_logger -from matcher import distance, compute_iou +from ppstructure.table.matcher import distance, compute_iou from ppstructure.utility import parse_args +import ppstructure.table.predict_structure as predict_strture logger = get_logger()