Merge branch 'develop' of https://github.com/PaddlePaddle/PaddleOCR into master
|
@ -0,0 +1,35 @@
|
|||
# ex: set ts=8 noet:
|
||||
|
||||
all: qt5 test
|
||||
|
||||
test: testpy3
|
||||
|
||||
testpy2:
|
||||
python -m unittest discover tests
|
||||
|
||||
testpy3:
|
||||
python3 -m unittest discover tests
|
||||
|
||||
qt4: qt4py2
|
||||
|
||||
qt5: qt5py3
|
||||
|
||||
qt4py2:
|
||||
pyrcc4 -py2 -o libs/resources.py resources.qrc
|
||||
|
||||
qt4py3:
|
||||
pyrcc4 -py3 -o libs/resources.py resources.qrc
|
||||
|
||||
qt5py3:
|
||||
pyrcc5 -o libs/resources.py resources.qrc
|
||||
|
||||
clean:
|
||||
rm -rf ~/.labelImgSettings.pkl *.pyc dist labelImg.egg-info __pycache__ build
|
||||
|
||||
pip_upload:
|
||||
python3 setup.py upload
|
||||
|
||||
long_description:
|
||||
restview --long-description
|
||||
|
||||
.PHONY: all
|
|
@ -61,7 +61,7 @@ from libs.zoomWidget import ZoomWidget
|
|||
from libs.autoDialog import AutoDialog
|
||||
from libs.labelDialog import LabelDialog
|
||||
from libs.colorDialog import ColorDialog
|
||||
from libs.labelFile import LabelFile, LabelFileError, LabelFileFormat
|
||||
from libs.labelFile import LabelFile, LabelFileError
|
||||
from libs.toolBar import ToolBar
|
||||
from libs.ustr import ustr
|
||||
from libs.hashableQListWidgetItem import HashableQListWidgetItem
|
||||
|
@ -91,7 +91,7 @@ class WindowMixin(object):
|
|||
class MainWindow(QMainWindow, WindowMixin):
|
||||
FIT_WINDOW, FIT_WIDTH, MANUAL_ZOOM = list(range(3))
|
||||
|
||||
def __init__(self, defaultFilename=None, defaultPrefdefClassFile=None, defaultSaveDir=None, language="zh-CN"):
|
||||
def __init__(self, lang="ch", defaultFilename=None, defaultPrefdefClassFile=None, defaultSaveDir=None):
|
||||
super(MainWindow, self).__init__()
|
||||
self.setWindowTitle(__appname__)
|
||||
|
||||
|
@ -99,15 +99,15 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
self.settings = Settings()
|
||||
self.settings.load()
|
||||
settings = self.settings
|
||||
|
||||
self.lang = lang
|
||||
# Load string bundle for i18n
|
||||
if language not in ['zh-CN', 'en']:
|
||||
language = 'zh-CN'
|
||||
self.stringBundle = StringBundle.getBundle(localeStr=language) # 'en'
|
||||
if lang not in ['ch', 'en']:
|
||||
lang = 'en'
|
||||
self.stringBundle = StringBundle.getBundle(localeStr='zh-CN' if lang=='ch' else 'en') # 'en'
|
||||
getStr = lambda strId: self.stringBundle.getString(strId)
|
||||
|
||||
self.defaultSaveDir = defaultSaveDir
|
||||
self.ocr = PaddleOCR(use_pdserving=False, use_angle_cls=True, det=True, cls=True, use_gpu=True, lang="ch")
|
||||
self.ocr = PaddleOCR(use_pdserving=False, use_angle_cls=True, det=True, cls=True, use_gpu=False, lang=lang)
|
||||
|
||||
if os.path.exists('./data/paddle.png'):
|
||||
result = self.ocr.ocr('./data/paddle.png', cls=True, det=True)
|
||||
|
@ -162,7 +162,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
self.AutoRecognition.setToolButtonStyle(Qt.ToolButtonTextBesideIcon)
|
||||
self.AutoRecognition.setIcon(newIcon('Auto'))
|
||||
# self.AutoRecognition.setIconSize(QSize(100,20))
|
||||
self.AutoRecognition.setFixedSize(QSize(80,30))
|
||||
# self.AutoRecognition.setFixedSize(QSize(80,30))
|
||||
# self.AutoRecognition.setStyleSheet('text-align:center;')#border:none;font-size : 12pt;
|
||||
autoRecLayout = QHBoxLayout()
|
||||
autoRecLayout.setContentsMargins(0, 0, 0, 0)
|
||||
|
@ -189,18 +189,18 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
self.editButton = QToolButton()
|
||||
self.reRecogButton = QToolButton()
|
||||
self.reRecogButton.setIcon(newIcon('reRec', 30))
|
||||
self.reRecogButton.setFixedSize(QSize(80,30))
|
||||
# self.reRecogButton.setFixedSize(QSize(80,30))
|
||||
self.reRecogButton.setToolButtonStyle(Qt.ToolButtonTextBesideIcon)
|
||||
|
||||
self.newButton = QToolButton()
|
||||
self.newButton.setToolButtonStyle(Qt.ToolButtonTextBesideIcon)
|
||||
self.newButton.setFixedSize(QSize(80, 30))
|
||||
# self.newButton.setFixedSize(QSize(80, 30))
|
||||
self.SaveButton = QToolButton()
|
||||
self.SaveButton.setToolButtonStyle(Qt.ToolButtonTextBesideIcon)
|
||||
self.SaveButton.setFixedSize(QSize(60, 30))
|
||||
# self.SaveButton.setFixedSize(QSize(60, 30))
|
||||
self.DelButton = QToolButton()
|
||||
self.DelButton.setToolButtonStyle(Qt.ToolButtonTextBesideIcon)
|
||||
self.DelButton.setFixedSize(QSize(80, 30))
|
||||
# self.DelButton.setFixedSize(QSize(80, 30))
|
||||
|
||||
|
||||
lefttoptoolbox = QHBoxLayout()
|
||||
|
@ -423,10 +423,10 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
'Ctrl+D', 'copy', getStr('dupBoxDetail'),
|
||||
enabled=False)
|
||||
|
||||
hideAll = action('&Hide\nRectBox', partial(self.togglePolygons, False),
|
||||
hideAll = action(getStr('hideBox'), partial(self.togglePolygons, False),
|
||||
'Ctrl+H', 'hide', getStr('hideAllBoxDetail'),
|
||||
enabled=False)
|
||||
showAll = action('&Show\nRectBox', partial(self.togglePolygons, True),
|
||||
showAll = action(getStr('showBox'), partial(self.togglePolygons, True),
|
||||
'Ctrl+A', 'hide', getStr('showAllBoxDetail'),
|
||||
enabled=False)
|
||||
|
||||
|
@ -593,7 +593,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
zoomIn, zoomOut, zoomOrg, None,
|
||||
fitWindow, fitWidth))
|
||||
|
||||
addActions(self.menus.autolabel, (saveRec, None, help)) # alcm,
|
||||
addActions(self.menus.autolabel, (alcm, saveRec, None, help)) #
|
||||
|
||||
self.menus.file.aboutToShow.connect(self.updateFileMenu)
|
||||
|
||||
|
@ -787,7 +787,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
QMessageBox.information(self, u'Information', msg)
|
||||
|
||||
def showStepsDialog(self):
|
||||
msg = steps()
|
||||
msg = stepsInfo(self.lang)
|
||||
QMessageBox.information(self, u'Information', msg)
|
||||
|
||||
def createShape(self):
|
||||
|
@ -1917,7 +1917,50 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
|
||||
|
||||
def autolcm(self):
|
||||
print('autolabelchoosemodel')
|
||||
vbox = QVBoxLayout()
|
||||
hbox = QHBoxLayout()
|
||||
self.panel = QLabel()
|
||||
self.panel.setText(self.stringBundle.getString('choseModelLg'))
|
||||
self.panel.setAlignment(Qt.AlignLeft)
|
||||
self.comboBox = QComboBox()
|
||||
self.comboBox.setObjectName("comboBox")
|
||||
self.comboBox.addItems(['Chinese & English', 'English', 'French', 'German', 'Korean', 'Japanese'])
|
||||
# self.comboBox_lg = QComboBox()
|
||||
# self.comboBox_lg.setObjectName("comboBox_language")
|
||||
vbox.addWidget(self.panel)
|
||||
vbox.addWidget(self.comboBox)
|
||||
self.dialog = QDialog()
|
||||
self.dialog.resize(300, 100)
|
||||
self.okBtn = QPushButton(self.stringBundle.getString('ok'))
|
||||
self.cancelBtn = QPushButton(self.stringBundle.getString('cancel'))
|
||||
|
||||
self.okBtn.clicked.connect(self.modelChoose)
|
||||
self.cancelBtn.clicked.connect(self.cancel)
|
||||
self.dialog.setWindowTitle(self.stringBundle.getString('choseModelLg'))
|
||||
|
||||
hbox.addWidget(self.okBtn)
|
||||
hbox.addWidget(self.cancelBtn)
|
||||
|
||||
vbox.addWidget(self.panel)
|
||||
vbox.addLayout(hbox)
|
||||
self.dialog.setLayout(vbox)
|
||||
self.dialog.setWindowModality(Qt.ApplicationModal)
|
||||
self.dialog.exec_()
|
||||
if self.filePath:
|
||||
self.AutoRecognition.setEnabled(True)
|
||||
|
||||
|
||||
def modelChoose(self):
|
||||
print(self.comboBox.currentText())
|
||||
lg_idx = {'Chinese & English': 'ch', 'English': 'en', 'French': 'french', 'German': 'german',
|
||||
'Korean': 'korean', 'Japanese': 'japan'}
|
||||
del self.ocr
|
||||
self.ocr = PaddleOCR(use_pdserving=False, use_angle_cls=True, det=True, cls=True, use_gpu=False,
|
||||
lang=lg_idx[self.comboBox.currentText()])
|
||||
self.dialog.close()
|
||||
|
||||
def cancel(self):
|
||||
self.dialog.close()
|
||||
|
||||
def loadFilestate(self, saveDir):
|
||||
self.fileStatepath = saveDir + '/fileState.txt'
|
||||
|
@ -2020,18 +2063,15 @@ def get_main_app(argv=[]):
|
|||
app.setWindowIcon(newIcon("app"))
|
||||
# Tzutalin 201705+: Accept extra agruments to change predefined class file
|
||||
argparser = argparse.ArgumentParser()
|
||||
argparser.add_argument("image_dir", nargs="?")
|
||||
argparser.add_argument("language", default='zh-CN',nargs="?")
|
||||
argparser.add_argument("predefined_classes_file",
|
||||
argparser.add_argument("--lang", default='ch', nargs="?")
|
||||
argparser.add_argument("--predefined_classes_file",
|
||||
default=os.path.join(os.path.dirname(__file__), "data", "predefined_classes.txt"),
|
||||
nargs="?")
|
||||
argparser.add_argument("save_dir", nargs="?")
|
||||
args = argparser.parse_args(argv[1:])
|
||||
# Usage : labelImg.py image predefClassFile saveDir
|
||||
win = MainWindow(args.image_dir,
|
||||
args.predefined_classes_file,
|
||||
args.save_dir,
|
||||
args.language)
|
||||
win = MainWindow(lang=args.lang,
|
||||
defaultPrefdefClassFile=args.predefined_classes_file,
|
||||
)
|
||||
win.show()
|
||||
return app, win
|
||||
|
||||
|
|
|
@ -24,11 +24,9 @@ python PPOCRLabel.py
|
|||
#### Ubuntu Linux
|
||||
|
||||
```
|
||||
sudo apt-get install pyqt5-dev-tools
|
||||
sudo apt-get install trash-cli
|
||||
pip3 install pyqt5
|
||||
pip3 install trash-cli
|
||||
cd ./PPOCRLabel # 将目录切换到PPOCRLabel文件夹下
|
||||
sudo pip3 install -r requirements/requirements-linux-python3.txt
|
||||
make qt5py3
|
||||
python3 PPOCRLabel.py
|
||||
```
|
||||
|
||||
|
@ -38,7 +36,6 @@ pip3 install pyqt5
|
|||
pip3 uninstall opencv-python # 由于mac版本的opencv与pyqt有冲突,需先手动卸载opencv
|
||||
pip3 install opencv-contrib-python-headless # 安装headless版本的open-cv
|
||||
cd ./PPOCRLabel # 将目录切换到PPOCRLabel文件夹下
|
||||
make qt5py3
|
||||
python3 PPOCRLabel.py
|
||||
```
|
||||
|
||||
|
@ -75,6 +72,20 @@ python3 PPOCRLabel.py
|
|||
| rec_gt.txt | 识别标签。可直接用于PPOCR识别模型训练。需用户手动点击菜单栏“PaddleOCR” - "保存识别结果"后产生。 |
|
||||
| crop_img | 识别数据。按照检测框切割后的图片。与rec_gt.txt同时产生。 |
|
||||
|
||||
## 说明
|
||||
### 内置模型
|
||||
- 默认模型:PPOCRLabel默认使用PaddleOCR中的中英文超轻量OCR模型,支持中英文与数字识别,多种语言检测。
|
||||
- 模型语言切换:用户可通过菜单栏中 "PaddleOCR" - "选择模型" 切换内置模型语言,目前支持的语言包括法文、德文、韩文、日文。具体模型下载链接可参考[PaddleOCR模型列表](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/models_list.md).
|
||||
- 自定义模型:用户可根据[自定义模型代码使用](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/whl.md#%E8%87%AA%E5%AE%9A%E4%B9%89%E6%A8%A1%E5%9E%8B),通过修改PPOCRLabel.py中针对[PaddleOCR类的实例化](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/PPOCRLabel/PPOCRLabel.py#L110)替换成自己训练的模型
|
||||
|
||||
### 错误提示
|
||||
- 如果同时使用whl包安装了paddleocr,其优先级大于通过paddleocr.py调用PaddleOCR类,whl包未更新时会导致程序异常。
|
||||
- PPOCRLabel**不支持对中文文件名**的图片进行自动标注。
|
||||
- 如果您在打开软件过程中出现**objc[XXXXX]**开头的错误,证明您的opencv版本太高,建议安装4.2版本:
|
||||
```
|
||||
pip install opencv-python==4.2.0.32
|
||||
```
|
||||
|
||||
### 参考资料
|
||||
|
||||
1.[Tzutalin. LabelImg. Git code (2015)](https://github.com/tzutalin/labelImg)
|
||||
|
|
|
@ -0,0 +1,102 @@
|
|||
# PPOCRLabel
|
||||
|
||||
PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field. It is written in python3 and pyqt5. Support rectangular frame labeling and four-point labeling mode. Annotations can be directly used for the training of PPOCR detection and recognition models.
|
||||
|
||||
<img src="./data/gif/steps.gif" width="100%"/>
|
||||
|
||||
## Installation
|
||||
|
||||
### 1. Install PaddleOCR
|
||||
|
||||
Refer to [PaddleOCR installation document](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/installation.md) to prepare PaddleOCR
|
||||
|
||||
### 2. Install PPOCRLabel
|
||||
|
||||
#### Windows + Anaconda
|
||||
|
||||
Download and install [Anaconda](https://www.anaconda.com/download/#download) (Python 3+)
|
||||
|
||||
```
|
||||
conda install pyqt=5
|
||||
cd ./PPOCRLabel # Change the directory to the PPOCRLabel folder
|
||||
pyrcc5 -o libs/resources.py resources.qrc
|
||||
python PPOCRLabel.py --lang en
|
||||
```
|
||||
|
||||
#### Ubuntu Linux
|
||||
|
||||
```
|
||||
pip3 install pyqt5
|
||||
pip3 install trash-cli
|
||||
cd ./PPOCRLabel # Change the directory to the PPOCRLabel folder
|
||||
python3 PPOCRLabel.py --lang en
|
||||
```
|
||||
|
||||
#### macOS
|
||||
```
|
||||
pip3 install pyqt5
|
||||
pip3 uninstall opencv-python # Uninstall opencv manually as it conflicts with pyqt
|
||||
pip3 install opencv-contrib-python-headless # Install the headless version of opencv
|
||||
cd ./PPOCRLabel # Change the directory to the PPOCRLabel folder
|
||||
python3 PPOCRLabel.py --lang en
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Steps
|
||||
|
||||
1. Build and launch using the instructions above.
|
||||
|
||||
2. Click 'Open Dir' in Menu/File to select the folder of the picture.<sup>[1]</sup>
|
||||
|
||||
3. Click 'Auto recognition', use PPOCR model to automatically annotate images which marked with 'X' <sup>[2]</sup>before the file name.
|
||||
|
||||
4. Create Box:
|
||||
|
||||
4.1 Click 'Create RectBox' or press 'W' in English keyboard mode to draw a new rectangle detection box. Click and release left mouse to select a region to annotate the text area.
|
||||
|
||||
4.2 Press 'P' to enter four-point labeling mode which enables you to create any four-point shape by clicking four points with the left mouse button in succession and DOUBLE CLICK the left mouse as the signal of labeling completion.
|
||||
|
||||
5. After the marking frame is drawn, the user clicks "OK", and the detection frame will be pre-assigned a "TEMPORARY" label.
|
||||
|
||||
6. Click 're-Recognition', model will rewrite ALL recognition results in ALL detection box<sup>[3]</sup>.
|
||||
|
||||
7. Double click the result in 'recognition result' list to manually change inaccurate recognition results.
|
||||
|
||||
8. Click "Save", the image status will switch to "√",then the program automatically jump to the next.
|
||||
|
||||
9. Click "Delete Image" and the image will be deleted to the recycle bin.
|
||||
|
||||
10. Labeling result: After closing the application or switching the file path, the manually saved label will be stored in *Label.txt* under the opened picture folder.
|
||||
Click "PaddleOCR"-"Save Recognition Results" in the menu bar, the recognition training data of such pictures will be saved in the *crop_img* folder, and the recognition label will be saved in *rec_gt.txt*<sup>[4]</sup>.
|
||||
|
||||
### Note
|
||||
|
||||
[1] PPOCRLabel uses the opened folder as the project. After opening the image folder, the picture will not be displayed in the dialog. Instead, the pictures under the folder will be directly imported into the program after clicking "Open Dir".
|
||||
|
||||
[2] The image status indicates whether the user has saved the image manually. If it has not been saved manually it is "X", otherwise it is "√", PPOCRLabel will not relabel pictures with a status of "√".
|
||||
|
||||
[3] After clicking "Re-recognize", the model will overwrite ALL recognition results in the picture.
|
||||
Therefore, if the recognition result has been manually changed before, it may change after re-recognition.
|
||||
|
||||
[4] The files produced by PPOCRLabel include the following, please do not manually change the contents, otherwise it will cause the program to be abnormal.
|
||||
|
||||
| File name | Description |
|
||||
| :-----------: | :----------------------------------------------------------: |
|
||||
| Label.txt | The detection label file can be directly used for PPOCR detection model training. After the user saves 10 label results, the file will be automatically saved. It will also be written when the user closes the application or changes the file folder. |
|
||||
| fileState.txt | The picture status file save the image in the current folder that has been manually confirmed by the user. |
|
||||
| Cache.cach | Cache files to save the results of model recognition. |
|
||||
| rec_gt.txt | The recognition label file, which can be directly used for PPOCR identification model training, is generated after the user clicks on the menu bar "PaddleOCR"-"Save recognition result". |
|
||||
| crop_img | The recognition data, generated at the same time with *rec_gt.txt* |
|
||||
|
||||
|
||||
### Built-in Model
|
||||
- Default model: PPOCRLabel uses the Chinese and English ultra-lightweight OCR model in PaddleOCR by default, supports Chinese, English and number recognition, and multiple language detection.
|
||||
- Model language switching: Changing the built-in model language is supportable by clicking "PaddleOCR"-"Choose OCR Model" in the menu bar. Currently supported languagesinclude French, German, Korean, and Japanese.
|
||||
For specific model download links, please refer to [PaddleOCR Model List](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/models_list_en.md#multilingual-recognition-modelupdating)
|
||||
- Custom model: The model trained by users can be replaced by modifying PPOCRLabel.py in [PaddleOCR class instantiation](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/PPOCRLabel/PPOCRLabel.py#L110) referring [Custom Model Code](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/whl_en.md#use-custom-model)
|
||||
|
||||
|
||||
## Related
|
||||
|
||||
1.[Tzutalin. LabelImg. Git code (2015)](https://github.com/tzutalin/labelImg)
|
|
@ -1,12 +0,0 @@
|
|||
*.spec
|
||||
build
|
||||
dist
|
||||
pyinstaller
|
||||
python-2.*
|
||||
pywin32*
|
||||
virtual-wine
|
||||
venv_wine
|
||||
PyQt4-*
|
||||
lxml-*
|
||||
windows_v*
|
||||
linux_v*
|
|
@ -1,35 +0,0 @@
|
|||
### Deploy to PyPI
|
||||
|
||||
```
|
||||
cd [ROOT]
|
||||
sh build-tools/build-for-pypi.sh
|
||||
```
|
||||
|
||||
### Build for Ubuntu
|
||||
|
||||
```
|
||||
cd build-tools
|
||||
sh run-in-container.sh
|
||||
sh envsetup.sh
|
||||
sh build-ubuntu-binary.sh
|
||||
```
|
||||
|
||||
### Build for Windows
|
||||
|
||||
```
|
||||
cd build-tools
|
||||
sh run-in-container.sh
|
||||
sh envsetup.sh
|
||||
sh build-windows-binary.sh
|
||||
```
|
||||
|
||||
### Build for macOS High Sierra
|
||||
```
|
||||
cd build-tools
|
||||
./build-for-macos.sh
|
||||
```
|
||||
|
||||
Note: If there are some problems, try to
|
||||
```
|
||||
sudo rm -rf virtual-wne venv_wine
|
||||
```
|
|
@ -1,30 +0,0 @@
|
|||
#!/bin/sh
|
||||
|
||||
brew install python@2
|
||||
pip install --upgrade virtualenv
|
||||
|
||||
# clone labelimg source
|
||||
rm -rf /tmp/labelImgSetup
|
||||
mkdir /tmp/labelImgSetup
|
||||
cd /tmp/labelImgSetup
|
||||
curl https://codeload.github.com/tzutalin/labelImg/zip/master --output labelImg.zip
|
||||
unzip labelImg.zip
|
||||
rm labelImg.zip
|
||||
|
||||
# setup python3 space
|
||||
virtualenv --system-site-packages -p python3 /tmp/labelImgSetup/labelImg-py3
|
||||
source /tmp/labelImgSetup/labelImg-py3/bin/activate
|
||||
cd labelImg-master
|
||||
|
||||
# build labelImg app
|
||||
pip install py2app
|
||||
pip install PyQt5 lxml
|
||||
make qt5py3
|
||||
rm -rf build dist
|
||||
python setup.py py2app -A
|
||||
mv "/tmp/labelImgSetup/labelImg-master/dist/labelImg.app" /Applications
|
||||
# deactivate python3
|
||||
deactivate
|
||||
cd ../
|
||||
rm -rf /tmp/labelImgSetup
|
||||
echo 'DONE'
|
|
@ -1,17 +0,0 @@
|
|||
#!/bin/sh
|
||||
# Packaging and Release
|
||||
docker run --workdir=$(pwd)/ --volume="/home/$USER:/home/$USER" tzutalin/py2qt4 /bin/sh -c 'make qt4py2; make test;sudo python setup.py sdist;sudo python setup.py install'
|
||||
|
||||
while true; do
|
||||
read -p "Do you wish to deploy this to PyPI(twine upload dist/* or pip install dist/*)?" yn
|
||||
case $yn in
|
||||
[Yy]* ) docker run -it --rm --workdir=$(pwd)/ --volume="/home/$USER:/home/$USER" tzutalin/py2qt4; break;;
|
||||
[Nn]* ) exit;;
|
||||
* ) echo "Please answer yes or no.";;
|
||||
esac
|
||||
done
|
||||
# python setup.py register
|
||||
# python setup.py sdist upload
|
||||
# Net pypi: twine upload dist/*
|
||||
|
||||
# Test before upladoing: pip install dist/labelImg.tar.gz
|
|
@ -1,24 +0,0 @@
|
|||
#!/bin/bash
|
||||
### Ubuntu use pyinstall v3.0
|
||||
THIS_SCRIPT_PATH=`readlink -f $0`
|
||||
THIS_SCRIPT_DIR=`dirname ${THIS_SCRIPT_PATH}`
|
||||
cd pyinstaller
|
||||
git checkout v3.2
|
||||
cd ${THIS_SCRIPT_DIR}
|
||||
|
||||
rm -r build
|
||||
rm -r dist
|
||||
rm labelImg.spec
|
||||
python pyinstaller/pyinstaller.py --hidden-import=xml \
|
||||
--hidden-import=xml.etree \
|
||||
--hidden-import=xml.etree.ElementTree \
|
||||
--hidden-import=lxml.etree \
|
||||
-D -F -n labelImg -c "../labelImg.py" -p ../libs -p ../
|
||||
|
||||
FOLDER=$(git describe --abbrev=0 --tags)
|
||||
FOLDER="linux_"$FOLDER
|
||||
rm -rf "$FOLDER"
|
||||
mkdir "$FOLDER"
|
||||
cp dist/labelImg $FOLDER
|
||||
cp -rf ../data $FOLDER/data
|
||||
zip "$FOLDER.zip" -r $FOLDER
|
|
@ -1,32 +0,0 @@
|
|||
#!/bin/bash
|
||||
### Window requires pyinstall v2.1
|
||||
wine msiexec -i python-2.7.8.msi
|
||||
wine pywin32-218.win32-py2.7.exe
|
||||
wine PyQt4-4.11.4-gpl-Py2.7-Qt4.8.7-x32.exe
|
||||
wine lxml-3.7.3.win32-py2.7.exe
|
||||
|
||||
THIS_SCRIPT_PATH=`readlink -f $0`
|
||||
THIS_SCRIPT_DIR=`dirname ${THIS_SCRIPT_PATH}`
|
||||
cd pyinstaller
|
||||
git checkout v2.1
|
||||
cd ${THIS_SCRIPT_DIR}
|
||||
echo ${THIS_SCRIPT_DIR}
|
||||
|
||||
#. venv_wine/bin/activate
|
||||
rm -r build
|
||||
rm -r dist
|
||||
rm labelImg.spec
|
||||
|
||||
wine c:/Python27/python.exe pyinstaller/pyinstaller.py --hidden-import=xml \
|
||||
--hidden-import=xml.etree \
|
||||
--hidden-import=xml.etree.ElementTree \
|
||||
--hidden-import=lxml.etree \
|
||||
-D -F -n labelImg -c "../labelImg.py" -p ../libs -p ../
|
||||
|
||||
FOLDER=$(git describe --abbrev=0 --tags)
|
||||
FOLDER="windows_"$FOLDER
|
||||
rm -rf "$FOLDER"
|
||||
mkdir "$FOLDER"
|
||||
cp dist/labelImg.exe $FOLDER
|
||||
cp -rf ../data $FOLDER/data
|
||||
zip "$FOLDER.zip" -r $FOLDER
|
|
@ -1,53 +0,0 @@
|
|||
#!/bin/sh
|
||||
|
||||
THIS_SCRIPT_PATH=`readlink -f $0`
|
||||
THIS_SCRIPT_DIR=`dirname ${THIS_SCRIPT_PATH}`
|
||||
#OS Ubuntu 14.04
|
||||
### Common packages for linux/windows
|
||||
if [ ! -e "pyinstaller" ]; then
|
||||
git clone https://github.com/pyinstaller/pyinstaller
|
||||
cd pyinstaller
|
||||
git checkout v2.1 -b v2.1
|
||||
cd ${THIS_SCRIPT_DIR}
|
||||
fi
|
||||
|
||||
echo "Going to clone and download packages for building windows"
|
||||
#Pacakges
|
||||
#> pyinstaller (2.1)
|
||||
#> wine (1.6.2)
|
||||
#> virtual-wine (0.1)
|
||||
#> python-2.7.8.msi
|
||||
#> pywin32-218.win32-py2.7.exe
|
||||
|
||||
## tool to install on Ubuntu
|
||||
#$ sudo apt-get install wine
|
||||
|
||||
### Clone a repo to create virtual wine env
|
||||
if [ ! -e "virtual-wine" ]; then
|
||||
git clone https://github.com/htgoebel/virtual-wine.git
|
||||
fi
|
||||
|
||||
apt-get install scons
|
||||
### Create virtual env
|
||||
rm -rf venv_wine
|
||||
./virtual-wine/vwine-setup venv_wine
|
||||
#### Active virutal env
|
||||
. venv_wine/bin/activate
|
||||
|
||||
### Use wine to install packages to virtual env
|
||||
if [ ! -e "python-2.7.8.msi" ]; then
|
||||
wget "https://www.python.org/ftp/python/2.7.8/python-2.7.8.msi"
|
||||
fi
|
||||
|
||||
if [ ! -e "pywin32-218.win32-py2.7.exe" ]; then
|
||||
wget "http://nchc.dl.sourceforge.net/project/pywin32/pywin32/Build%20218/pywin32-218.win32-py2.7.exe"
|
||||
fi
|
||||
|
||||
if [ ! -e "PyQt4-4.11.4-gpl-Py2.7-Qt4.8.7-x32.exe" ]; then
|
||||
wget "http://nchc.dl.sourceforge.net/project/pyqt/PyQt4/PyQt-4.11.4/PyQt4-4.11.4-gpl-Py2.7-Qt4.8.7-x32.exe"
|
||||
fi
|
||||
|
||||
if [ ! -e "lxml-3.7.3.win32-py2.7.exe" ]; then
|
||||
wget "https://pypi.python.org/packages/a3/f6/a28c5cf63873f6c55a3eb7857b736379229b85ba918261d2e88cf886905e/lxml-3.7.3.win32-py2.7.exe#md5=a0f746355876aca4ca5371cb0f1d13ce"
|
||||
fi
|
||||
|
|
@ -1,13 +0,0 @@
|
|||
#!/bin/sh
|
||||
docker run -it \
|
||||
--user $(id -u) \
|
||||
-e DISPLAY=unix$DISPLAY \
|
||||
--workdir=$(pwd) \
|
||||
--volume="/home/$USER:/home/$USER" \
|
||||
--volume="/etc/group:/etc/group:ro" \
|
||||
--volume="/etc/passwd:/etc/passwd:ro" \
|
||||
--volume="/etc/shadow:/etc/shadow:ro" \
|
||||
--volume="/etc/sudoers.d:/etc/sudoers.d:ro" \
|
||||
-v /tmp/.X11-unix:/tmp/.X11-unix \
|
||||
tzutalin/py2qt4
|
||||
|
|
@ -150,18 +150,33 @@ def get_rotate_crop_image(img, points):
|
|||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
def steps():
|
||||
msg = "1. 安装与运行:使用上述命令安装与运行程序。\n" \
|
||||
"2. 打开文件夹:在菜单栏点击 “文件” - 打开目录 选择待标记图片的文件夹.\n"\
|
||||
"3. 自动标注:点击 ”自动标注“,使用PPOCR超轻量模型对图片文件名前图片状态为 “X” 的图片进行自动标注。\n" \
|
||||
"4. 手动标注:点击 “矩形标注”(推荐直接在英文模式下点击键盘中的 “W”),用户可对当前图片中模型未检出的部分进行手动" \
|
||||
"绘制标记框。点击键盘P,则使用四点标注模式(或点击“编辑” - “四点标注”),用户依次点击4个点后,双击左键表示标注完成。\n" \
|
||||
"5. 标记框绘制完成后,用户点击 “确认”,检测框会先被预分配一个 “待识别” 标签。\n" \
|
||||
"6. 重新识别:将图片中的所有检测画绘制/调整完成后,点击 “重新识别”,PPOCR模型会对当前图片中的**所有检测框**重新识别。\n" \
|
||||
"7. 内容更改:双击识别结果,对不准确的识别结果进行手动更改。\n" \
|
||||
"8. 保存:点击 “保存”,图片状态切换为 “√”,跳转至下一张。\n" \
|
||||
"9. 删除:点击 “删除图像”,图片将会被删除至回收站。\n" \
|
||||
"10. 标注结果:关闭应用程序或切换文件路径后,手动保存过的标签将会被存放在所打开图片文件夹下的" \
|
||||
"*Label.txt*中。在菜单栏点击 “PaddleOCR” - 保存识别结果后,会将此类图片的识别训练数据保存在*crop_img*文件夹下," \
|
||||
"识别标签保存在*rec_gt.txt*中。\n"
|
||||
def stepsInfo(lang='en'):
|
||||
if lang == 'ch':
|
||||
msg = "1. 安装与运行:使用上述命令安装与运行程序。\n" \
|
||||
"2. 打开文件夹:在菜单栏点击 “文件” - 打开目录 选择待标记图片的文件夹.\n"\
|
||||
"3. 自动标注:点击 ”自动标注“,使用PPOCR超轻量模型对图片文件名前图片状态为 “X” 的图片进行自动标注。\n" \
|
||||
"4. 手动标注:点击 “矩形标注”(推荐直接在英文模式下点击键盘中的 “W”),用户可对当前图片中模型未检出的部分进行手动" \
|
||||
"绘制标记框。点击键盘P,则使用四点标注模式(或点击“编辑” - “四点标注”),用户依次点击4个点后,双击左键表示标注完成。\n" \
|
||||
"5. 标记框绘制完成后,用户点击 “确认”,检测框会先被预分配一个 “待识别” 标签。\n" \
|
||||
"6. 重新识别:将图片中的所有检测画绘制/调整完成后,点击 “重新识别”,PPOCR模型会对当前图片中的**所有检测框**重新识别。\n" \
|
||||
"7. 内容更改:双击识别结果,对不准确的识别结果进行手动更改。\n" \
|
||||
"8. 保存:点击 “保存”,图片状态切换为 “√”,跳转至下一张。\n" \
|
||||
"9. 删除:点击 “删除图像”,图片将会被删除至回收站。\n" \
|
||||
"10. 标注结果:关闭应用程序或切换文件路径后,手动保存过的标签将会被存放在所打开图片文件夹下的" \
|
||||
"*Label.txt*中。在菜单栏点击 “PaddleOCR” - 保存识别结果后,会将此类图片的识别训练数据保存在*crop_img*文件夹下," \
|
||||
"识别标签保存在*rec_gt.txt*中。\n"
|
||||
else:
|
||||
msg = "1. Build and launch using the instructions above.\n" \
|
||||
"2. Click 'Open Dir' in Menu/File to select the folder of the picture.\n"\
|
||||
"3. Click 'Auto recognition', use PPOCR model to automatically annotate images which marked with 'X' before the file name."\
|
||||
"4. Create Box:\n"\
|
||||
"4.1 Click 'Create RectBox' or press 'W' in English keyboard mode to draw a new rectangle detection box. Click and release left mouse to select a region to annotate the text area.\n"\
|
||||
"4.2 Press 'P' to enter four-point labeling mode which enables you to create any four-point shape by clicking four points with the left mouse button in succession and DOUBLE CLICK the left mouse as the signal of labeling completion.\n"\
|
||||
"5. After the marking frame is drawn, the user clicks 'OK', and the detection frame will be pre-assigned a TEMPORARY label.\n"\
|
||||
"6. Click re-Recognition, model will rewrite ALL recognition results in ALL detection box.\n"\
|
||||
"7. Double click the result in 'recognition result' list to manually change inaccurate recognition results.\n"\
|
||||
"8. Click 'Save', the image status will switch to '√',then the program automatically jump to the next.\n"\
|
||||
"9. Click 'Delete Image' and the image will be deleted to the recycle bin.\n"\
|
||||
"10. Labeling result: After closing the application or switching the file path, the manually saved label will be stored in *Label.txt* under the opened picture folder.\n"\
|
||||
" Click PaddleOCR-Save Recognition Results in the menu bar, the recognition training data of such pictures will be saved in the *crop_img* folder, and the recognition label will be saved in *rec_gt.txt*.\n"
|
||||
return msg
|
|
@ -87,4 +87,10 @@ creatPolygon=四点标注
|
|||
drawSquares=正方形标注
|
||||
saveRec=保存识别结果
|
||||
tempLabel=待识别
|
||||
steps=操作步骤
|
||||
steps=操作步骤
|
||||
choseModelLg=选择模型语言
|
||||
cancel=取消
|
||||
ok=确认
|
||||
autolabeling=自动标注中
|
||||
hideBox=隐藏所有标注
|
||||
showBox=显示所有标注
|
|
@ -76,10 +76,10 @@ ImageResize=Image Resize
|
|||
IR=Image Resize
|
||||
autoRecognition=Auto Recognition
|
||||
reRecognition=Re-recognition
|
||||
mfile=file
|
||||
medit=eidt
|
||||
mview=view
|
||||
mhelp=help
|
||||
mfile=File
|
||||
medit=Eidt
|
||||
mview=View
|
||||
mhelp=Help
|
||||
iconList=Icon List
|
||||
detectionBoxposition=Detection box position
|
||||
recognitionResult=Recognition result
|
||||
|
@ -87,4 +87,10 @@ creatPolygon=Create Quadrilateral
|
|||
drawSquares=Draw Squares
|
||||
saveRec=Save Recognition Result
|
||||
tempLabel=TEMPORARY
|
||||
setps=Steps
|
||||
steps=Steps
|
||||
choseModelLg=Choose Model Language
|
||||
cancel=Cancel
|
||||
ok=OK
|
||||
autolabeling=Automatic Labeling
|
||||
hideBox=Hide All Box
|
||||
showBox=Show All Box
|
17
README.md
|
@ -4,6 +4,7 @@ English | [简体中文](README_ch.md)
|
|||
PaddleOCR aims to create multilingual, awesome, leading, and practical OCR tools that help users train better models and apply them into practice.
|
||||
|
||||
**Recent updates**
|
||||
- 2020.11.25 Update a new data annotation tool, i.e., [PPOCRLabel](./PPOCRLabel/README_en.md), which is helpful to improve the labeling efficiency. Moreover, the labeling results can be used in training of the PP-OCR system directly.
|
||||
- 2020.9.22 Update the PP-OCR technical article, https://arxiv.org/abs/2009.09941
|
||||
- 2020.9.19 Update the ultra lightweight compressed ppocr_mobile_slim series models, the overall model size is 3.5M (see [PP-OCR Pipeline](#PP-OCR-Pipeline)), suitable for mobile deployment. [Model Downloads](#Supported-Chinese-model-list)
|
||||
- 2020.9.17 Update the ultra lightweight ppocr_mobile series and general ppocr_server series Chinese and English ocr models, which are comparable to commercial effects. [Model Downloads](#Supported-Chinese-model-list)
|
||||
|
@ -77,30 +78,32 @@ For a new language request, please refer to [Guideline for new language_requests
|
|||
- [Installation](./doc/doc_en/installation_en.md)
|
||||
- [Quick Start](./doc/doc_en/quickstart_en.md)
|
||||
- [Code Structure](./doc/doc_en/tree_en.md)
|
||||
- Algorithm introduction
|
||||
- Algorithm Introduction
|
||||
- [Text Detection Algorithm](./doc/doc_en/algorithm_overview_en.md)
|
||||
- [Text Recognition Algorithm](./doc/doc_en/algorithm_overview_en.md)
|
||||
- [PP-OCR Pipeline](#PP-OCR-Pipeline)
|
||||
- Model training/evaluation
|
||||
- Model Training/Evaluation
|
||||
- [Text Detection](./doc/doc_en/detection_en.md)
|
||||
- [Text Recognition](./doc/doc_en/recognition_en.md)
|
||||
- [Direction Classification](./doc/doc_en/angle_class_en.md)
|
||||
- [Yml Configuration](./doc/doc_en/config_en.md)
|
||||
- Inference and Deployment
|
||||
- [Quick inference based on pip](./doc/doc_en/whl_en.md)
|
||||
- [Quick Inference Based on PIP](./doc/doc_en/whl_en.md)
|
||||
- [Python Inference](./doc/doc_en/inference_en.md)
|
||||
- [C++ Inference](./deploy/cpp_infer/readme_en.md)
|
||||
- [Serving](./deploy/hubserving/readme_en.md)
|
||||
- [Mobile](./deploy/lite/readme_en.md)
|
||||
- [Model Quantization](./deploy/slim/quantization/README_en.md)
|
||||
- [Model Compression](./deploy/slim/prune/README_en.md)
|
||||
- [Benchmark](./doc/doc_en/benchmark_en.md)
|
||||
- [Benchmark](./doc/doc_en/benchmark_en.md)
|
||||
- Data Annotation and Synthesis
|
||||
- [Semi-automatic Annotation Tool](./PPOCRLabel/README_en.md)
|
||||
- [Data Annotation Tools](./doc/doc_en/data_annotation_en.md)
|
||||
- [Data Synthesis Tools](./doc/doc_en/data_synthesis_en.md)
|
||||
- Datasets
|
||||
- [General OCR Datasets(Chinese/English)](./doc/doc_en/datasets_en.md)
|
||||
- [HandWritten_OCR_Datasets(Chinese)](./doc/doc_en/handwritten_datasets_en.md)
|
||||
- [Various OCR Datasets(multilingual)](./doc/doc_en/vertical_and_multilingual_datasets_en.md)
|
||||
- [Data Annotation Tools](./doc/doc_en/data_annotation_en.md)
|
||||
- [Data Synthesis Tools](./doc/doc_en/data_synthesis_en.md)
|
||||
- [Visualization](#Visualization)
|
||||
- [New language requests](#language_requests)
|
||||
- [FAQ](./doc/doc_en/FAQ_en.md)
|
||||
|
@ -177,3 +180,5 @@ We welcome all the contributions to PaddleOCR and appreciate for your feedback v
|
|||
- Thanks [tangmq](https://gitee.com/tangmq) for contributing Dockerized deployment services to PaddleOCR and supporting the rapid release of callable Restful API services.
|
||||
- Thanks [lijinhan](https://github.com/lijinhan) for contributing a new way, i.e., java SpringBoot, to achieve the request for the Hubserving deployment.
|
||||
- Thanks [Mejans](https://github.com/Mejans) for contributing the Occitan corpus and character set.
|
||||
- Thanks [LKKlein](https://github.com/LKKlein) for contributing a new deploying package with the Golang program language.
|
||||
- Thanks [Evezerest](https://github.com/Evezerest), [ninetailskim](https://github.com/ninetailskim), [edencfc](https://github.com/edencfc), [BeyondYourself](https://github.com/BeyondYourself) and [1084667371](https://github.com/1084667371) for contributing a new data annotation tool, i.e., PPOCRLabel。
|
||||
|
|
|
@ -4,8 +4,8 @@
|
|||
PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力使用者训练出更好的模型,并应用落地。
|
||||
|
||||
**近期更新**
|
||||
- 2020.11.30 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题,共计119个常见问题及解答,并且计划以后每周一都会更新,欢迎大家持续关注。
|
||||
- 2020.11.25 更新半自动标注工具[PPOCRLabel](./PPOCRLabel/README.md),辅助开发者高效完成标注任务,输出格式与PP-OCR训练任务完美衔接。
|
||||
- 2020.11.16 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题,共计109个常见问题及解答,并且计划以后每周一都会更新,欢迎大家持续关注。
|
||||
- 2020.9.22 更新PP-OCR技术文章,https://arxiv.org/abs/2009.09941
|
||||
- 2020.9.19 更新超轻量压缩ppocr_mobile_slim系列模型,整体模型3.5M(详见[PP-OCR Pipeline](#PP-OCR)),适合在移动端部署使用。[模型下载](#模型下载)
|
||||
- 2020.9.17 更新超轻量ppocr_mobile系列和通用ppocr_server系列中英文ocr模型,媲美商业效果。[模型下载](#模型下载)
|
||||
|
@ -100,8 +100,8 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力
|
|||
- [效果展示](#效果展示)
|
||||
- FAQ
|
||||
- [【精选】OCR精选10个问题](./doc/doc_ch/FAQ.md)
|
||||
- [【理论篇】OCR通用27个问题](./doc/doc_ch/FAQ.md)
|
||||
- [【实战篇】PaddleOCR实战72个问题](./doc/doc_ch/FAQ.md)
|
||||
- [【理论篇】OCR通用29个问题](./doc/doc_ch/FAQ.md)
|
||||
- [【实战篇】PaddleOCR实战80个问题](./doc/doc_ch/FAQ.md)
|
||||
- [技术交流群](#欢迎加入PaddleOCR技术交流群)
|
||||
- [参考文献](./doc/doc_ch/reference.md)
|
||||
- [许可证书](#许可证书)
|
||||
|
|
|
@ -9,47 +9,42 @@
|
|||
|
||||
## PaddleOCR常见问题汇总(持续更新)
|
||||
|
||||
* [近期更新(2020.11.16)](#近期更新)
|
||||
* [近期更新(2020.11.30)](#近期更新)
|
||||
* [【精选】OCR精选10个问题](#OCR精选10个问题)
|
||||
* [【理论篇】OCR通用27个问题](#OCR通用问题)
|
||||
* [【理论篇】OCR通用29个问题](#OCR通用问题)
|
||||
* [基础知识7题](#基础知识)
|
||||
* [数据集5题](#数据集)
|
||||
* [模型训练调优7题](#模型训练调优)
|
||||
* [预测部署8题](#预测部署)
|
||||
* [【实战篇】PaddleOCR实战72个问题](#PaddleOCR实战问题)
|
||||
* [数据集7题](#数据集2)
|
||||
* [模型训练调优7题](#模型训练调优2)
|
||||
* [预测部署8题](#预测部署2)
|
||||
* [【实战篇】PaddleOCR实战80个问题](#PaddleOCR实战问题)
|
||||
* [使用咨询20题](#使用咨询)
|
||||
* [数据集14题](#数据集)
|
||||
* [模型训练调优21题](#模型训练调优)
|
||||
* [预测部署21题](#预测部署)
|
||||
* [数据集17题](#数据集3)
|
||||
* [模型训练调优21题](#模型训练调优3)
|
||||
* [预测部署22题](#预测部署3)
|
||||
|
||||
|
||||
<a name="近期更新"></a>
|
||||
## 近期更新(2020.11.23)
|
||||
## 近期更新(2020.11.30)
|
||||
|
||||
#### Q3.2.11:有哪些标注工具可以标注OCR数据集?
|
||||
#### Q3.2.15: 文本标注工具PPOCRLabel有什么特色?
|
||||
|
||||
**A**:您可以参考:https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/data_annotation_en.md。
|
||||
我们计划推出高效标注OCR数据的标注工具,请您持续关注PaddleOCR的近期更新。
|
||||
**A**: PPOCRLabel是一个半自动文本标注工具,它使用基于PPOCR的中英文OCR模型,预先预测文本检测和识别结果,然后用户对上述结果进行校验和修正就行,大大提高用户的标注效率。同时导出的标注结果直接适配PPOCR训练所需要的数据格式,
|
||||
|
||||
#### Q3.2.12:一些特殊场景的数据识别效果差,但是数据量很少,不够用来finetune怎么办?
|
||||
#### Q3.2.16: 文本标注工具PPOCRLabel,可以更换模型吗?
|
||||
|
||||
**A**:您可以合成一些接近使用场景的数据用于训练。
|
||||
我们计划推出基于特定场景的文本数据合成工具,请您持续关注PaddleOCR的近期更新。
|
||||
**A**: PPOCRLabel中OCR部署方式采用的基于pip安装whl包快速推理,可以参考相关文档更换模型路径,进行特定任务的标注适配。基于pip安装whl包快速推理的文档如下,https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/whl.md。
|
||||
|
||||
#### Q3.2.13:特殊字符(例如一些标点符号)识别效果不好怎么办?
|
||||
#### Q3.2.17: 文本标注工具PPOCRLabel支持的运行环境有哪些?
|
||||
|
||||
**A**:首先请您确认要识别的特殊字符是否在字典中。
|
||||
如果字符在已经字典中但效果依然不好,可能是由于识别数据较少导致的,您可以增加相应数据finetune模型。
|
||||
**A**: PPOCRLabel可运行于Linux、Windows、MacOS等多种系统。操作步骤可以参考文档,https://github.com/PaddlePaddle/PaddleOCR/blob/develop/PPOCRLabel/README.md
|
||||
|
||||
#### Q3.2.14:PaddleOCR可以识别灰度图吗?
|
||||
#### Q2.2.6: 当训练数据量少时,如何获取更多的数据?
|
||||
|
||||
**A**:PaddleOCR的模型均为三通道输入。如果您想使用灰度图作为输入,建议直接用3通道的模式读入灰度图,
|
||||
或者将单通道图像转换为三通道图像再识别。例如,opencv的cvtColor函数就可以将灰度图转换为RGB三通道模式。
|
||||
**A**: 当训练数据量少时,可以尝试以下三种方式获取更多的数据:(1)人工采集更多的训练数据,最直接也是最有效的方式。(2)基于PIL和opencv基本图像处理或者变换。例如PIL中ImageFont, Image, ImageDraw三个模块将文字写到背景中,opencv的旋转仿射变换,高斯滤波等。(3)利用数据生成算法合成数据,例如pix2pix等算法。
|
||||
|
||||
#### Q3.1.20:PaddleOCR与百度的其他OCR产品有什么区别?
|
||||
#### Q2.2.7: 论文《Editing Text in the Wild》中文本合成方法SRNet有什么特点?
|
||||
|
||||
**A**:PaddleOCR主要聚焦通用ocr,如果有垂类需求,您可以用PaddleOCR+垂类数据自己训练;
|
||||
如果缺少带标注的数据,或者不想投入研发成本,建议直接调用开放的API,开放的API覆盖了目前比较常见的一些垂类。
|
||||
**A**: SRNet是借鉴GAN中图像到图像转换、风格迁移的想法合成文本数据。不同于通用GAN的方法只选择一个分支,SRNet将文本合成任务分解为三个简单的子模块,提升合成数据的效果。这三个子模块为不带背景的文本风格迁移模块、背景抽取模块和融合模块。PaddleOCR计划将在2020年12月中旬开源基于SRNet的实用模型。
|
||||
|
||||
|
||||
<a name="OCR精选10个问题"></a>
|
||||
|
@ -141,6 +136,8 @@
|
|||
|
||||
<a name="OCR通用问题"></a>
|
||||
## 【理论篇】OCR通用问题
|
||||
|
||||
<a name="基础知识"></a>
|
||||
### 基础知识
|
||||
|
||||
#### Q2.1.1:CRNN能否识别两行的文字?还是说必须一行?
|
||||
|
@ -169,6 +166,7 @@
|
|||
|
||||
**A**:处理字符的时候,把多字符的当作一个字就行,字典中每行是一个字。
|
||||
|
||||
<a name="数据集2"></a>
|
||||
### 数据集
|
||||
|
||||
#### Q2.2.1:支持空格的模型,标注数据的时候是不是要标注空格?中间几个空格都要标注出来么?
|
||||
|
@ -191,7 +189,15 @@
|
|||
|
||||
**A**:使用基于分割的方法,如DB,检测密集文本行时,最好收集一批数据进行训练,并且在训练时,并将生成二值图像的shrink_ratio参数调小一些。
|
||||
|
||||
#### Q2.2.6: 当训练数据量少时,如何获取更多的数据?
|
||||
|
||||
**A**: 当训练数据量少时,可以尝试以下三种方式获取更多的数据:(1)人工采集更多的训练数据,最直接也是最有效的方式。(2)基于PIL和opencv基本图像处理或者变换。例如PIL中ImageFont, Image, ImageDraw三个模块将文字写到背景中,opencv的旋转仿射变换,高斯滤波等。(3)利用数据生成算法合成数据,例如pix2pix等算法。
|
||||
|
||||
#### Q2.2.7: 论文《Editing Text in the Wild》中文本合成方法SRNet有什么特点?
|
||||
|
||||
**A**: SRNet是借鉴GAN中图像到图像转换、风格迁移的想法合成文本数据。不同于通用GAN的方法只选择一个分支,SRNet将文本合成任务分解为三个简单的子模块,提升合成数据的效果。这三个子模块为不带背景的文本风格迁移模块、背景抽取模块和融合模块。PaddleOCR计划将在2020年12月中旬开源基于SRNet的实用模型。
|
||||
|
||||
<a name="模型训练调优2"></a>
|
||||
### 模型训练调优
|
||||
|
||||
#### Q2.3.1:如何更换文本检测/识别的backbone?
|
||||
|
@ -233,6 +239,7 @@
|
|||
|
||||
(2)调大系统的[l2 dcay值](https://github.com/PaddlePaddle/PaddleOCR/blob/a501603d54ff5513fc4fc760319472e59da25424/configs/rec/ch_ppocr_v1.1/rec_chinese_lite_train_v1.1.yml#L47)
|
||||
|
||||
<a name="预测部署2"></a>
|
||||
### 预测部署
|
||||
|
||||
#### Q2.4.1:请问对于图片中的密集文字,有什么好的处理办法吗?
|
||||
|
@ -280,6 +287,7 @@
|
|||
<a name="PaddleOCR实战问题"></a>
|
||||
## 【实战篇】PaddleOCR实战问题
|
||||
|
||||
<a name="使用咨询"></a>
|
||||
### 使用咨询
|
||||
|
||||
#### Q3.1.1:OSError: [WinError 126] 找不到指定的模块。mac pro python 3.4 shapely import 问题
|
||||
|
@ -377,7 +385,7 @@
|
|||
**A**:PaddleOCR主要聚焦通用ocr,如果有垂类需求,您可以用PaddleOCR+垂类数据自己训练;
|
||||
如果缺少带标注的数据,或者不想投入研发成本,建议直接调用开放的API,开放的API覆盖了目前比较常见的一些垂类。
|
||||
|
||||
|
||||
<a name="数据集3"></a>
|
||||
### 数据集
|
||||
|
||||
#### Q3.2.1:如何制作PaddleOCR支持的数据格式
|
||||
|
@ -456,7 +464,19 @@
|
|||
**A**:PaddleOCR的模型均为三通道输入。如果您想使用灰度图作为输入,建议直接用3通道的模式读入灰度图,
|
||||
或者将单通道图像转换为三通道图像再识别。例如,opencv的cvtColor函数就可以将灰度图转换为RGB三通道模式。
|
||||
|
||||
#### Q3.2.15: 文本标注工具PPOCRLabel有什么特色?
|
||||
|
||||
**A**: PPOCRLabel是一个半自动文本标注工具,它使用基于PPOCR的中英文OCR模型,预先预测文本检测和识别结果,然后用户对上述结果进行校验和修正就行,大大提高用户的标注效率。同时导出的标注结果直接适配PPOCR训练所需要的数据格式,
|
||||
|
||||
#### Q3.2.16: 文本标注工具PPOCRLabel,可以更换模型吗?
|
||||
|
||||
**A**: PPOCRLabel中OCR部署方式采用的基于pip安装whl包快速推理,可以参考相关文档更换模型路径,进行特定任务的标注适配。基于pip安装whl包快速推理的文档如下,https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/whl.md。
|
||||
|
||||
#### Q3.2.17: 文本标注工具PPOCRLabel支持的运行环境有哪些?
|
||||
|
||||
**A**: PPOCRLabel可运行于Linux、Windows、MacOS等多种系统。操作步骤可以参考文档,https://github.com/PaddlePaddle/PaddleOCR/blob/develop/PPOCRLabel/README.md
|
||||
|
||||
<a name="模型训练调优3"></a>
|
||||
### 模型训练调优
|
||||
|
||||
#### Q3.3.1:文本长度超过25,应该怎么处理?
|
||||
|
@ -574,7 +594,7 @@ return paddle.reader.multiprocess_reader(readers, False, queue_size=320)
|
|||
|
||||
(3)在训练的时候,文本长度超过25的训练图像都会被丢弃,因此需要看下真正参与训练的图像有多少,太少的话也容易过拟合。
|
||||
|
||||
|
||||
<a name="预测部署3"></a>
|
||||
### 预测部署
|
||||
|
||||
#### Q3.4.1:如何pip安装opt模型转换工具?
|
||||
|
|
|
@ -4,6 +4,8 @@
|
|||
|
||||
### 安装whl包
|
||||
|
||||
首先需要参照[安装文档](installation.md)安装paddlepaddle,然后开始安装paddleocr package
|
||||
|
||||
pip安装
|
||||
```bash
|
||||
pip install paddleocr
|
||||
|
@ -166,7 +168,7 @@ paddleocr -h
|
|||
|
||||
* 检测+分类+识别全流程
|
||||
```bash
|
||||
paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --use_angle_cls true --cls true
|
||||
paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --use_angle_cls true
|
||||
```
|
||||
结果是一个list,每个item包含了文本框,文字和识别置信度
|
||||
```bash
|
||||
|
@ -190,7 +192,7 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg
|
|||
|
||||
* 分类+识别
|
||||
```bash
|
||||
paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --use_angle_cls true --cls true --det false
|
||||
paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --use_angle_cls true --det false
|
||||
```
|
||||
|
||||
结果是一个list,每个item只包含识别结果和识别置信度
|
||||
|
@ -222,7 +224,7 @@ paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --det false
|
|||
|
||||
* 单独执行分类
|
||||
```bash
|
||||
paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --use_angle_cls true --cls true --det false --rec false
|
||||
paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --use_angle_cls true --det false --rec false
|
||||
```
|
||||
|
||||
结果是一个list,每个item只包含分类结果和分类置信度
|
||||
|
@ -258,7 +260,7 @@ im_show.save('result.jpg')
|
|||
### 通过命令行使用
|
||||
|
||||
```bash
|
||||
paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_dir} --rec_model_dir {your_rec_model_dir} --rec_char_dict_path {your_rec_char_dict_path} --cls_model_dir {your_cls_model_dir} --use_angle_cls true --cls true
|
||||
paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_dir} --rec_model_dir {your_rec_model_dir} --rec_char_dict_path {your_rec_char_dict_path} --cls_model_dir {your_cls_model_dir} --use_angle_cls true
|
||||
```
|
||||
|
||||
## 参数说明
|
||||
|
@ -295,4 +297,4 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_
|
|||
| lang | 模型语言类型,目前支持 中文(ch)和英文(en) | ch |
|
||||
| det | 前向时使用启动检测 | TRUE |
|
||||
| rec | 前向时是否启动识别 | TRUE |
|
||||
| cls | 前向时是否启动分类 | FALSE |
|
||||
| cls | 前向时是否启动分类, 此参数仅存在于`代码使用`模式 | FALSE |
|
||||
|
|
|
@ -2,6 +2,9 @@
|
|||
|
||||
## Get started quickly
|
||||
### install package
|
||||
|
||||
First, you need to refer to [installation document](installation_en.md) to install paddlepaddle, and then start to install paddleocr package
|
||||
|
||||
install by pypi
|
||||
```bash
|
||||
pip install paddleocr
|
||||
|
@ -172,7 +175,7 @@ paddleocr -h
|
|||
|
||||
* detection classification and recognition
|
||||
```bash
|
||||
paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg --use_angle_cls true -cls true --lang en
|
||||
paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg --use_angle_cls true --lang en
|
||||
```
|
||||
|
||||
Output will be a list, each item contains bounding box, text and recognition confidence
|
||||
|
@ -198,7 +201,7 @@ Output will be a list, each item contains bounding box, text and recognition con
|
|||
|
||||
* classification and recognition
|
||||
```bash
|
||||
paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --use_angle_cls true -cls true --det false --lang en
|
||||
paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --use_angle_cls true --det false --lang en
|
||||
```
|
||||
|
||||
Output will be a list, each item contains text and recognition confidence
|
||||
|
@ -221,7 +224,7 @@ Output will be a list, each item only contains bounding box
|
|||
|
||||
* only recognition
|
||||
```bash
|
||||
paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --det false --cls false --lang en
|
||||
paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --det false --lang en
|
||||
```
|
||||
|
||||
Output will be a list, each item contains text and recognition confidence
|
||||
|
@ -231,7 +234,7 @@ Output will be a list, each item contains text and recognition confidence
|
|||
|
||||
* only classification
|
||||
```bash
|
||||
paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --use_angle_cls true -cls true --det false --rec false
|
||||
paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --use_angle_cls true --det false --rec false
|
||||
```
|
||||
|
||||
Output will be a list, each item contains classification result and confidence
|
||||
|
@ -268,7 +271,7 @@ im_show.save('result.jpg')
|
|||
### Use by command line
|
||||
|
||||
```bash
|
||||
paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_dir} --rec_model_dir {your_rec_model_dir} --rec_char_dict_path {your_rec_char_dict_path} --cls_model_dir {your_cls_model_dir} --use_angle_cls true --cls true
|
||||
paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_dir} --rec_model_dir {your_rec_model_dir} --rec_char_dict_path {your_rec_char_dict_path} --cls_model_dir {your_cls_model_dir} --use_angle_cls true
|
||||
```
|
||||
|
||||
## Parameter Description
|
||||
|
@ -305,4 +308,4 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_
|
|||
| lang | The support language, now only Chinese(ch)、English(en)、French(french)、German(german)、Korean(korean)、Japanese(japan) are supported | ch |
|
||||
| det | Enable detction when `ppocr.ocr` func exec | TRUE |
|
||||
| rec | Enable recognition when `ppocr.ocr` func exec | TRUE |
|
||||
| cls | Enable classification when `ppocr.ocr` func exec | FALSE |
|
||||
| cls | Enable classification when `ppocr.ocr` func exec,this parameter only exists in `code usage` mode | FALSE |
|
||||
|
|
BIN
doc/joinus.PNG
Before Width: | Height: | Size: 225 KiB After Width: | Height: | Size: 415 KiB |
|
@ -160,6 +160,7 @@ class RecModel(object):
|
|||
"We set img_shape to be the same , it may affect the inference effect"
|
||||
)
|
||||
image_shape = deepcopy(self.image_shape)
|
||||
image_shape.insert(0, -1)
|
||||
image = fluid.data(name='image', shape=image_shape, dtype='float32')
|
||||
image.stop_gradient = False
|
||||
if self.loss_type == "srn":
|
||||
|
|
|
@ -4,4 +4,4 @@ pyclipper
|
|||
lmdb
|
||||
tqdm
|
||||
numpy
|
||||
opencv-python
|
||||
opencv-python==4.2.0.32
|
||||
|
|
|
@ -0,0 +1,337 @@
|
|||
# PaddleOCR-GO
|
||||
|
||||
本服务是[PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR)的golang部署版本。
|
||||
|
||||
## 1. 环境准备
|
||||
|
||||
### 运行环境
|
||||
|
||||
- go: 1.14
|
||||
- OpenCV: 4.3.0
|
||||
- PaddlePaddle: 1.8.4
|
||||
- 编译环境:cmake 3.15.4 | gcc 4.8.5
|
||||
- 基于Centos 7.4运行环境编译,Windows请自行解决`OpenCV`和`PaddlePaddle`的编译问题
|
||||
|
||||
*另外,以下编译以`.bashrc`个人环境变量配置文件,如果使用`zsh`,请自行更换为`.zshrc`*
|
||||
|
||||
### 1.1 安装golang
|
||||
|
||||
从官网下载[golang](https://golang.org/dl/),建议选择1.13版本以上进行安装。下载完成后,直接解压你需要的安装目录,并配置相关环境变量,此处以1.14版本为例。
|
||||
|
||||
```shell
|
||||
# 下载golang
|
||||
wget https://golang.org/dl/go1.14.10.linux-amd64.tar.gz
|
||||
|
||||
# 解压到 /usr/local 目录下
|
||||
tar -xzvf go1.14.10.linux-amd64.tar.gz -C /usr/local
|
||||
|
||||
# 配置GOROOT,即go的安装目录
|
||||
echo "export GOROOT=/usr/local/go" >> ~/.bashrc
|
||||
echo "export PATH=$PATH:$GOROOT/bin" >> ~/.bashrc
|
||||
# 配置GOPATH,即go相关package的安装目录,可自定义一个目录
|
||||
echo "export GOPATH=$HOME/golang" >> ~/.bashrc
|
||||
echo "export PATH=$PATH:$GOPATH/bin" >> ~/.bashrc
|
||||
# 配置GOPROXY,即go mod包管理器的下载代理,同时打开mod模式
|
||||
echo "export GO111MODULE=on" >> ~/.bashrc
|
||||
echo "export GOPROXY=https://mirrors.aliyun.com/goproxy/" >> ~/.bashrc
|
||||
source ~/.bashrc
|
||||
```
|
||||
|
||||
### 1.2 编译OpenCV库
|
||||
|
||||
go语言中,OpenCV的使用主要以[gocv](https://github.com/hybridgroup/gocv)包为主,gocv使用cgo调用OpenCV提供接口,因此还是需要编译OpenCV库。
|
||||
|
||||
**踩坑指南之一:[gocv官方实现](https://github.com/hybridgroup/gocv)中,部分接口并没有与原版C++的OpenCV的API保持一致,导致图片处理结果会出现一定的数值偏差。为处理这种偏差,[该仓库](https://github.com/LKKlein/gocv)fork了一份gocv官方源码,并对部分这些不一致的API进行了修正,保证结果与其他语言的一致性。**
|
||||
|
||||
对于OpenCV的编译,gocv官方提供了[Makefile](https://github.com/LKKlein/gocv/blob/lk/Makefile),可以一键进行安装,具体安装步骤详见[官方指南](https://github.com/LKKlein/gocv/blob/lk/README_ORIGIN.md#ubuntulinux)。
|
||||
|
||||
这里提供逐步安装的方式,方便排查错误。
|
||||
|
||||
- 下载并解压OpenCV-4.3.0和OpenCV-Contrib-4.3.0
|
||||
|
||||
```shell
|
||||
# 创建opencv安装目录
|
||||
mkdir -p ~/opencv
|
||||
|
||||
# 下载OpenCV
|
||||
cd ~/opencv
|
||||
curl -sL https://github.com/opencv/opencv/archive/4.3.0.zip > opencv.zip
|
||||
unzip -q opencv.zip
|
||||
rm -rf opencv.zip
|
||||
|
||||
# 下载OpenCV-Contrib
|
||||
curl -sL https://github.com/opencv/opencv_contrib/archive/4.3.0.zip > opencv-contrib.zip
|
||||
unzip -q opencv-contrib.zip
|
||||
rm -rf opencv-contrib.zip
|
||||
```
|
||||
|
||||
- 安装相关依赖
|
||||
|
||||
```shell
|
||||
sudo yum -y install pkgconfig cmake git gtk2-devel libpng-devel libjpeg-devel libtiff-devel tbb tbb-devel libdc1394-devel
|
||||
```
|
||||
|
||||
- 编译安装
|
||||
|
||||
```shell
|
||||
mkdir -p ~/.local/opencv-4.3.0
|
||||
cd ~/opencv/opencv-4.3.0
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -D WITH_IPP=OFF \
|
||||
-D WITH_OPENGL=OFF \
|
||||
-D WITH_QT=OFF \
|
||||
-D BUILD_EXAMPLES=OFF \
|
||||
-D BUILD_TESTS=OFF \
|
||||
-D BUILD_PERF_TESTS=OFF \
|
||||
-D BUILD_opencv_java=OFF \
|
||||
-D BUILD_opencv_python=OFF \
|
||||
-D BUILD_opencv_python2=OFF \
|
||||
-D BUILD_opencv_python3=OFF \
|
||||
-D OPENCV_GENERATE_PKGCONFIG=ON \
|
||||
-D CMAKE_INSTALL_PREFIX=$HOME/.local/opencv-4.3.0 \
|
||||
-D OPENCV_ENABLE_NONFREE=ON \
|
||||
-D OPENCV_EXTRA_MODULES_PATH=$HOME/opencv/opencv_contrib-4.3.0/modules ..
|
||||
make -j8
|
||||
make install
|
||||
sudo ldconfig
|
||||
```
|
||||
|
||||
make进行编译时,可能出现因`xfeatures2d`的两个模块下载失败导致的编译失败,这里只需要手动下载这部分文件到`$HOME/opencv/opencv_contrib-4.3.0/modules/xfeatures2d/src`目录下,然后重新执行`make -j8`即可。这部分文件地址可参考[这里](https://github.com/opencv/opencv_contrib/issues/1301#issuecomment-447181426)给出的链接。
|
||||
|
||||
- 配置环境变量
|
||||
|
||||
```shell
|
||||
echo "export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:$HOME/.local/opencv-4.3.0/lib64/pkgconfig" >> ~/.bashrc
|
||||
echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HOME/.local/opencv-4.3.0/lib64" >> ~/.bashrc
|
||||
source ~/.bashrc
|
||||
```
|
||||
|
||||
- 验证安装
|
||||
|
||||
```shell
|
||||
# 安装gocv包,先mod init
|
||||
go mod init opencv
|
||||
go get -u github.com/LKKlein/gocv
|
||||
|
||||
# 验证安装结果
|
||||
cd $GOPATH/pkg/mod/github.com/!l!k!klein/gocv@v0.28.0
|
||||
go run ./cmd/version/main.go
|
||||
|
||||
# 输出
|
||||
# gocv version: 0.28.0
|
||||
# opencv lib version: 4.3.0
|
||||
```
|
||||
|
||||
### 1.3 编译PaddlePaddle的C语言API
|
||||
|
||||
go语言只能通过cgo调用C语言API,而不能直接与C++进行交互,因此需要编译PaddlePaddle的C语言API。当然,也可以自己写C语言调用C++的代码和头文件,这样就可以直接使用PaddlePaddle提供的已编译的C++推理库,无需自己手动编译,详见[该仓库](https://github.com/LKKlein/paddleocr-go/tree/dev_cxx)。
|
||||
|
||||
- 获取PaddlePaddle源代码
|
||||
|
||||
```shell
|
||||
cd ~
|
||||
git clone --recurse-submodules https://github.com/paddlepaddle/paddle
|
||||
|
||||
# 切换到v1.8.4版本
|
||||
cd paddle
|
||||
git checkout v1.8.4
|
||||
|
||||
# 目前版本无论单卡还是多卡都需要先安装nccl
|
||||
git clone https://github.com/NVIDIA/nccl.git
|
||||
make -j8
|
||||
make install
|
||||
```
|
||||
|
||||
- 编译Paddle源代码
|
||||
|
||||
**踩坑指南之二:PaddlePaddle的C语言API实现有一个bug,即获取输入输出变量名时只能获取到第一个模型的变量名,后续模型都无法获取输入输出变量名,进而无法获取到模型输出,详情见[issue](https://github.com/PaddlePaddle/Paddle/issues/28309)。因此,编译前需要手动将`paddle/fluid/inference/capi/pd_predictor.cc`文件中`210行`与`215行`的`static`删除。**
|
||||
|
||||
在处理完该bug之后,才能进行后续编译。相关编译参数见[官方文档](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html#id12),注意部分参数需要相关依赖,请确保依赖完整再启用。
|
||||
|
||||
```shell
|
||||
# 创建c++推理库文件夹
|
||||
mkdir -p ~/paddle_inference
|
||||
export PADDLE_INFER=`$HOME/paddle_inference`
|
||||
|
||||
# 执行编译
|
||||
export PADDLE_ROOT=`pwd`
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -DFLUID_INFERENCE_INSTALL_DIR=$PADDLE_INFER \
|
||||
-DWITH_CONTRIB=OFF \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DWITH_PYTHON=OFF \
|
||||
-DWITH_MKL=ON \
|
||||
-DWITH_GPU=ON \
|
||||
-DON_INFER=ON \
|
||||
--WITH_MKLDNN=ON \
|
||||
--WITH_XBYAK=ON \
|
||||
--WITH_DSO=OFF ..
|
||||
make
|
||||
make inference_lib_dist
|
||||
```
|
||||
|
||||
编译完成后,可以在`build/fluid_inference_c_install_dir`目录下,看到以下生成的文件
|
||||
|
||||
```
|
||||
build/fluid_inference_c_install_dir
|
||||
├── paddle
|
||||
├── third_party
|
||||
└── version.txt
|
||||
```
|
||||
|
||||
其中`paddle`就是Paddle库的C语言预测API,`version.txt`中包含当前预测库的版本信息。最后,将C推理库配置到环境变量。
|
||||
|
||||
```shell
|
||||
echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PADDLE_ROOT/build/fluid_inference_c_install_dir/paddle/lib" >> ~/.bashrc
|
||||
echo "export LIBRARY_PATH=$LIBRARY_PATH:$PADDLE_ROOT/build/fluid_inference_c_install_dir/paddle/lib" >> ~/.bashrc
|
||||
|
||||
souce ~/.bashrc
|
||||
```
|
||||
|
||||
## 2. paddleocr-go预测库
|
||||
|
||||
### 2.1 安装paddleocr-go
|
||||
|
||||
确保C推理库已配置到环境变量,然后直接执行安装命令
|
||||
|
||||
```shell
|
||||
go get -u github.com/PaddlePaddle/PaddleOCR/thirdparty/paddleocr-go
|
||||
```
|
||||
|
||||
### 2.2 相关使用API
|
||||
|
||||
在go中使用import引入包
|
||||
|
||||
```go
|
||||
import github.com/PaddlePaddle/PaddleOCR/thirdparty/paddleocr-go/ocr
|
||||
```
|
||||
|
||||
- 预测结果结构体
|
||||
|
||||
```go
|
||||
type OCRText struct {
|
||||
BBox [][]int `json:"bbox"`
|
||||
Text string `json:"text"`
|
||||
Score float64 `json:"score"`
|
||||
}
|
||||
```
|
||||
|
||||
一张图的OCR结果包含多个`OCRText`结果,每个结果包含预测框、预测文本、预测文本得分。
|
||||
|
||||
- OCR预测类
|
||||
|
||||
|
||||
```go
|
||||
func NewOCRSystem(confFile string, a map[string]interface{}) *OCRSystem
|
||||
```
|
||||
|
||||
`OCRSystem`是主要对外提供API的结构;
|
||||
|
||||
`confFile`是yaml配置文件的路径,可在配置文件中修改相关预测参数,也可以传空字符串,这时会全部使用默认配置;
|
||||
|
||||
`a`是可以在代码中直接定义的配置参数,优先级高于配置文件,会覆盖配置文件和默认配置的参数。
|
||||
|
||||
- 单张图预测API
|
||||
|
||||
```go
|
||||
func (ocr *OCRSystem) PredictOneImage(img gocv.Mat) []OCRText
|
||||
```
|
||||
|
||||
|
||||
- 图片文件夹预测API
|
||||
|
||||
```go
|
||||
func (ocr *OCRSystem) PredictDirImages(dirname string) map[string][]OCRText
|
||||
```
|
||||
|
||||
`dirname`图片文件夹的目录,默认会预测改目录下所有`jpg`和`png`图片,并返回每张图的预测结果。
|
||||
|
||||
- OCR Server
|
||||
|
||||
```go
|
||||
func (ocr *OCRSystem) StartServer(port string)
|
||||
```
|
||||
|
||||
开启OCR预测Server,开启后,使用`post`请求上传需要识别的图片至`http://$ip:$port/ocr`即可直接获取该图片上所有文本的识别结果。其中,`$ip`是开启服务的主机`ip`或`127.0.0.1`的本地ip, `$port`是传入的端口参数。
|
||||
|
||||
|
||||
## 3. 预测demo
|
||||
|
||||
### 3.1 生成预测demo
|
||||
|
||||
以下两种方式均可生成预测demo文件,任选其一即可
|
||||
|
||||
- 通过下载`paddleocr-go`代码并编译
|
||||
|
||||
```shell
|
||||
git clone https://github.com/PaddlePaddle/PaddleOCR
|
||||
cd PaddleOCR/thirdparty/paddleocr-go
|
||||
|
||||
# 确保C动态库路径已在环境变量中,执行以下命令生成ppocr-go文件
|
||||
go build ppocr-go.go
|
||||
```
|
||||
|
||||
- 通过go package自动安装
|
||||
|
||||
```shell
|
||||
# 执行后会自动在$GOPATH/bin下生成ppocr-go文件,如果配置了PATH=$PATH:$GOPATH/bin,以下预测命令可以去掉`./`,直接执行ppocr-go
|
||||
go get -u github.com/PaddlePaddle/PaddleOCR/thirdparty/paddleocr-go
|
||||
```
|
||||
|
||||
### 3.2 修改预测配置
|
||||
|
||||
当前给定的配置文件`config/conf.yaml`中,包含了默认的OCR预测配置参数,可根据个人需要更改相关参数。
|
||||
|
||||
比如,将`use_gpu`改为`false`,使用CPU执行预测;将`det_model_dir`, `rec_model_dir`, `cls_model_dir`都更改为自己的本地模型路径,也或者是更改字典`rec_char_dict_path`的路径,这四个路径如果配置http链接,会自动下载到本地目录。另外,配置参数包含了预测引擎、检测模型、检测阈值、方向分类模型、识别模型及阈值的相关参数,具体参数的意义可参见[PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/whl.md#%E5%8F%82%E6%95%B0%E8%AF%B4%E6%98%8E)。
|
||||
|
||||
### 3.3 执行预测demo
|
||||
|
||||
预测demo提供了三种预测方式,分别是单张图预测、文件夹批量预测、OCR Server预测。三者命令行优先级依次降低。
|
||||
|
||||
#### 3.3.1 单张图预测
|
||||
|
||||
```shell
|
||||
./ppocr-go --config config/conf.yaml --image images/test.jpg
|
||||
```
|
||||
|
||||
执行完成,会输出以下内容:
|
||||
|
||||
<img src="./images/result/single_img_result.jpg" style="zoom:80%;" />
|
||||
|
||||
#### 3.3.2 文件夹批量预测
|
||||
|
||||
```shell
|
||||
./ppocr-go --config config/conf.yaml --image_dir ./images
|
||||
```
|
||||
|
||||
执行完成,会输出以下内容:
|
||||
|
||||
<img src="./images/result/img_dir_result.jpg" style="zoom:80%;" />
|
||||
|
||||
#### 3.3.3 开启OCR Server
|
||||
|
||||
```shell
|
||||
./ppocr-go --use_servering --port=18600
|
||||
```
|
||||
|
||||
开启服务后,可以在其他客户端中通过`post`请求进行ocr预测。此处以`Python`客户端为例,如下所示
|
||||
|
||||
```python
|
||||
import requests
|
||||
|
||||
files = {'image': open('images/test.jpg','rb')}
|
||||
url = "http://127.0.0.1:18600/ocr"
|
||||
|
||||
r = requests.post(url, files=files)
|
||||
print(r.text)
|
||||
```
|
||||
|
||||
执行完成可以得到以下结果
|
||||
|
||||
![](./images/result/python_client_result.jpg)
|
||||
|
||||
最后,在Python中将上述结果可视化可以得到以下结果
|
||||
|
||||
![](./images/result/python_vis_result.jpg)
|
|
@ -0,0 +1,47 @@
|
|||
# params for prediction engine
|
||||
use_gpu: true
|
||||
ir_optim: true
|
||||
enable_mkldnn: false
|
||||
# use_zero_copy_run: true
|
||||
use_tensorrt: false
|
||||
num_cpu_threads: 6
|
||||
gpu_id: 0
|
||||
gpu_mem: 2000
|
||||
|
||||
# params for text detector
|
||||
det_algorithm: "DB"
|
||||
det_model_dir: "https://paddleocr.bj.bcebos.com/20-09-22/mobile/det/ch_ppocr_mobile_v1.1_det_infer.tar"
|
||||
det_max_side_len: 960
|
||||
|
||||
# DB parmas
|
||||
det_db_thresh: 0.3
|
||||
det_db_box_thresh: 0.5
|
||||
det_db_unclip_ratio: 2.0
|
||||
|
||||
# EAST parmas
|
||||
det_east_score_thresh: 0.8
|
||||
det_east_cover_thresh: 0.1
|
||||
det_east_nms_thresh: 0.2
|
||||
|
||||
# params for text recognizer
|
||||
rec_algorithm: "CRNN"
|
||||
rec_model_dir: "https://paddleocr.bj.bcebos.com/20-09-22/mobile/rec/ch_ppocr_mobile_v1.1_rec_infer.tar"
|
||||
rec_image_shape: [3, 32, 320]
|
||||
rec_char_type: "ch"
|
||||
rec_batch_num: 30
|
||||
max_text_length: 25
|
||||
rec_char_dict_path: "https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/develop/ppocr/utils/ppocr_keys_v1.txt"
|
||||
use_space_char: true
|
||||
|
||||
# params for text classifier
|
||||
use_angle_cls: false
|
||||
cls_model_dir: "https://paddleocr.bj.bcebos.com/20-09-22/cls/ch_ppocr_mobile_v1.1_cls_infer.tar"
|
||||
cls_image_shape: [3, 48, 192]
|
||||
label_list: ["0", "180"]
|
||||
cls_batch_num: 30
|
||||
cls_thresh: 0.9
|
||||
|
||||
lang: ch
|
||||
det: true
|
||||
rec: true
|
||||
cls: false
|
|
@ -0,0 +1,9 @@
|
|||
module github.com/PaddlePaddle/PaddleOCR/thirdparty/paddleocr-go
|
||||
|
||||
go 1.14
|
||||
|
||||
require (
|
||||
github.com/LKKlein/gocv v0.28.0
|
||||
github.com/ctessum/go.clipper v0.0.0-20200522184404-9c744fa3e86c
|
||||
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776
|
||||
)
|
|
@ -0,0 +1,8 @@
|
|||
github.com/LKKlein/gocv v0.28.0 h1:1MMvs9uYf+QGPi86it2pUmN8RRoyMnPLUefKB/Jf1Q0=
|
||||
github.com/LKKlein/gocv v0.28.0/go.mod h1:MP408EL7eakRU3vzjsozzfELSX7HDDGdMpWANV1IOHY=
|
||||
github.com/PaddlePaddle/PaddleOCR v1.1.0 h1:zmPevInTs5P+ctSokI9sWQLTThmJBUCo/JCLbB5xbps=
|
||||
github.com/ctessum/go.clipper v0.0.0-20200522184404-9c744fa3e86c h1:VXCsVlam0R2Yl7VET2GxZBPdOa7gFRexyhfWb9v9QtM=
|
||||
github.com/ctessum/go.clipper v0.0.0-20200522184404-9c744fa3e86c/go.mod h1:KRMo3PCsooJP3LmCwKI76dkd7f3ki3zwYLHR7Iwbi5k=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776 h1:tQIYjPdBoyREyB9XMu+nnTclpTYkz2zFM+lzLJFO4gQ=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
After Width: | Height: | Size: 36 KiB |
After Width: | Height: | Size: 279 KiB |
After Width: | Height: | Size: 98 KiB |
After Width: | Height: | Size: 162 KiB |
After Width: | Height: | Size: 141 KiB |
After Width: | Height: | Size: 48 KiB |
|
@ -0,0 +1,262 @@
|
|||
package ocr
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"image"
|
||||
"image/color"
|
||||
"io"
|
||||
"log"
|
||||
"math"
|
||||
"net/http"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/LKKlein/gocv"
|
||||
"github.com/PaddlePaddle/PaddleOCR/thirdparty/paddleocr-go/paddle"
|
||||
)
|
||||
|
||||
type PaddleModel struct {
|
||||
predictor *paddle.Predictor
|
||||
input *paddle.ZeroCopyTensor
|
||||
outputs []*paddle.ZeroCopyTensor
|
||||
|
||||
useGPU bool
|
||||
deviceID int
|
||||
initGPUMem int
|
||||
numThreads int
|
||||
useMKLDNN bool
|
||||
useTensorRT bool
|
||||
useIROptim bool
|
||||
}
|
||||
|
||||
func NewPaddleModel(args map[string]interface{}) *PaddleModel {
|
||||
return &PaddleModel{
|
||||
useGPU: getBool(args, "use_gpu", false),
|
||||
deviceID: getInt(args, "gpu_id", 0),
|
||||
initGPUMem: getInt(args, "gpu_mem", 1000),
|
||||
numThreads: getInt(args, "num_cpu_threads", 6),
|
||||
useMKLDNN: getBool(args, "enable_mkldnn", false),
|
||||
useTensorRT: getBool(args, "use_tensorrt", false),
|
||||
useIROptim: getBool(args, "ir_optim", true),
|
||||
}
|
||||
}
|
||||
|
||||
func (model *PaddleModel) LoadModel(modelDir string) {
|
||||
config := paddle.NewAnalysisConfig()
|
||||
config.DisableGlogInfo()
|
||||
|
||||
config.SetModel(modelDir+"/model", modelDir+"/params")
|
||||
if model.useGPU {
|
||||
config.EnableUseGpu(model.initGPUMem, model.deviceID)
|
||||
} else {
|
||||
config.DisableGpu()
|
||||
config.SetCpuMathLibraryNumThreads(model.numThreads)
|
||||
if model.useMKLDNN {
|
||||
config.EnableMkldnn()
|
||||
}
|
||||
}
|
||||
|
||||
// config.EnableMemoryOptim()
|
||||
if model.useIROptim {
|
||||
config.SwitchIrOptim(true)
|
||||
}
|
||||
|
||||
// false for zero copy tensor
|
||||
config.SwitchUseFeedFetchOps(false)
|
||||
config.SwitchSpecifyInputNames(true)
|
||||
|
||||
model.predictor = paddle.NewPredictor(config)
|
||||
model.input = model.predictor.GetInputTensors()[0]
|
||||
model.outputs = model.predictor.GetOutputTensors()
|
||||
}
|
||||
|
||||
type OCRText struct {
|
||||
BBox [][]int `json:"bbox"`
|
||||
Text string `json:"text"`
|
||||
Score float64 `json:"score"`
|
||||
}
|
||||
|
||||
type TextPredictSystem struct {
|
||||
detector *DBDetector
|
||||
cls *TextClassifier
|
||||
rec *TextRecognizer
|
||||
}
|
||||
|
||||
func NewTextPredictSystem(args map[string]interface{}) *TextPredictSystem {
|
||||
sys := &TextPredictSystem{
|
||||
detector: NewDBDetector(getString(args, "det_model_dir", ""), args),
|
||||
rec: NewTextRecognizer(getString(args, "rec_model_dir", ""), args),
|
||||
}
|
||||
if getBool(args, "use_angle_cls", false) {
|
||||
sys.cls = NewTextClassifier(getString(args, "cls_model_dir", ""), args)
|
||||
}
|
||||
return sys
|
||||
}
|
||||
|
||||
func (sys *TextPredictSystem) sortBoxes(boxes [][][]int) [][][]int {
|
||||
sort.Slice(boxes, func(i, j int) bool {
|
||||
if boxes[i][0][1] < boxes[j][0][1] {
|
||||
return true
|
||||
}
|
||||
if boxes[i][0][1] > boxes[j][0][1] {
|
||||
return false
|
||||
}
|
||||
return boxes[i][0][0] < boxes[j][0][0]
|
||||
})
|
||||
|
||||
for i := 0; i < len(boxes)-1; i++ {
|
||||
if math.Abs(float64(boxes[i+1][0][1]-boxes[i][0][1])) < 10 && boxes[i+1][0][0] < boxes[i][0][0] {
|
||||
boxes[i], boxes[i+1] = boxes[i+1], boxes[i]
|
||||
}
|
||||
}
|
||||
return boxes
|
||||
}
|
||||
|
||||
func (sys *TextPredictSystem) getRotateCropImage(img gocv.Mat, box [][]int) gocv.Mat {
|
||||
cropW := int(math.Sqrt(math.Pow(float64(box[0][0]-box[1][0]), 2) + math.Pow(float64(box[0][1]-box[1][1]), 2)))
|
||||
cropH := int(math.Sqrt(math.Pow(float64(box[0][0]-box[3][0]), 2) + math.Pow(float64(box[0][1]-box[3][1]), 2)))
|
||||
ptsstd := make([]image.Point, 4)
|
||||
ptsstd[0] = image.Pt(0, 0)
|
||||
ptsstd[1] = image.Pt(cropW, 0)
|
||||
ptsstd[2] = image.Pt(cropW, cropH)
|
||||
ptsstd[3] = image.Pt(0, cropH)
|
||||
|
||||
points := make([]image.Point, 4)
|
||||
points[0] = image.Pt(box[0][0], box[0][1])
|
||||
points[1] = image.Pt(box[1][0], box[1][1])
|
||||
points[2] = image.Pt(box[2][0], box[2][1])
|
||||
points[3] = image.Pt(box[3][0], box[3][1])
|
||||
|
||||
M := gocv.GetPerspectiveTransform(points, ptsstd)
|
||||
defer M.Close()
|
||||
dstimg := gocv.NewMat()
|
||||
gocv.WarpPerspectiveWithParams(img, &dstimg, M, image.Pt(cropW, cropH),
|
||||
gocv.InterpolationCubic, gocv.BorderReplicate, color.RGBA{0, 0, 0, 0})
|
||||
|
||||
if float64(dstimg.Rows()) >= float64(dstimg.Cols())*1.5 {
|
||||
srcCopy := gocv.NewMat()
|
||||
gocv.Transpose(dstimg, &srcCopy)
|
||||
defer dstimg.Close()
|
||||
gocv.Flip(srcCopy, &srcCopy, 0)
|
||||
return srcCopy
|
||||
}
|
||||
return dstimg
|
||||
}
|
||||
|
||||
func (sys *TextPredictSystem) Run(img gocv.Mat) []OCRText {
|
||||
srcimg := gocv.NewMat()
|
||||
defer srcimg.Close()
|
||||
img.CopyTo(&srcimg)
|
||||
boxes := sys.detector.Run(img)
|
||||
if len(boxes) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
boxes = sys.sortBoxes(boxes)
|
||||
cropimages := make([]gocv.Mat, len(boxes))
|
||||
for i := 0; i < len(boxes); i++ {
|
||||
tmpbox := make([][]int, len(boxes[i]))
|
||||
for j := 0; j < len(tmpbox); j++ {
|
||||
tmpbox[j] = make([]int, len(boxes[i][j]))
|
||||
copy(tmpbox[j], boxes[i][j])
|
||||
}
|
||||
cropimg := sys.getRotateCropImage(srcimg, tmpbox)
|
||||
cropimages[i] = cropimg
|
||||
}
|
||||
if sys.cls != nil {
|
||||
cropimages = sys.cls.Run(cropimages)
|
||||
}
|
||||
recResult := sys.rec.Run(cropimages, boxes)
|
||||
return recResult
|
||||
}
|
||||
|
||||
type OCRSystem struct {
|
||||
args map[string]interface{}
|
||||
tps *TextPredictSystem
|
||||
}
|
||||
|
||||
func NewOCRSystem(confFile string, a map[string]interface{}) *OCRSystem {
|
||||
args, err := ReadYaml(confFile)
|
||||
if err != nil {
|
||||
log.Printf("Read config file %v failed! Please check. err: %v\n", confFile, err)
|
||||
log.Println("Program will use default config.")
|
||||
args = defaultArgs
|
||||
}
|
||||
for k, v := range a {
|
||||
args[k] = v
|
||||
}
|
||||
return &OCRSystem{
|
||||
args: args,
|
||||
tps: NewTextPredictSystem(args),
|
||||
}
|
||||
}
|
||||
|
||||
func (ocr *OCRSystem) StartServer(port string) {
|
||||
http.HandleFunc("/ocr", ocr.predictHandler)
|
||||
log.Println("OCR Server has been started on port :", port)
|
||||
err := http.ListenAndServe(":"+port, nil)
|
||||
if err != nil {
|
||||
log.Panicf("http error! error: %v\n", err)
|
||||
}
|
||||
}
|
||||
|
||||
func (ocr *OCRSystem) predictHandler(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != "POST" {
|
||||
w.Write([]byte(errors.New("post method only").Error()))
|
||||
return
|
||||
}
|
||||
r.ParseMultipartForm(32 << 20)
|
||||
var buf bytes.Buffer
|
||||
file, header, err := r.FormFile("image")
|
||||
if err != nil {
|
||||
w.Write([]byte(err.Error()))
|
||||
return
|
||||
}
|
||||
defer file.Close()
|
||||
ext := strings.ToLower(path.Ext(header.Filename))
|
||||
if ext != ".jpg" && ext != ".png" {
|
||||
w.Write([]byte(errors.New("only support image endswith jpg/png").Error()))
|
||||
return
|
||||
}
|
||||
|
||||
io.Copy(&buf, file)
|
||||
img, err2 := gocv.IMDecode(buf.Bytes(), gocv.IMReadColor)
|
||||
defer img.Close()
|
||||
if err2 != nil {
|
||||
w.Write([]byte(err2.Error()))
|
||||
return
|
||||
}
|
||||
result := ocr.PredictOneImage(img)
|
||||
if output, err3 := json.Marshal(result); err3 != nil {
|
||||
w.Write([]byte(err3.Error()))
|
||||
} else {
|
||||
w.Write(output)
|
||||
}
|
||||
}
|
||||
|
||||
func (ocr *OCRSystem) PredictOneImage(img gocv.Mat) []OCRText {
|
||||
return ocr.tps.Run(img)
|
||||
}
|
||||
|
||||
func (ocr *OCRSystem) PredictDirImages(dirname string) map[string][]OCRText {
|
||||
if dirname == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
imgs, _ := filepath.Glob(dirname + "/*.jpg")
|
||||
tmpimgs, _ := filepath.Glob(dirname + "/*.png")
|
||||
imgs = append(imgs, tmpimgs...)
|
||||
results := make(map[string][]OCRText, len(imgs))
|
||||
for i := 0; i < len(imgs); i++ {
|
||||
imgname := imgs[i]
|
||||
img := ReadImage(imgname)
|
||||
defer img.Close()
|
||||
res := ocr.PredictOneImage(img)
|
||||
results[imgname] = res
|
||||
}
|
||||
return results
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
package ocr
|
||||
|
||||
var (
|
||||
defaultArgs = map[string]interface{}{
|
||||
"use_gpu": true,
|
||||
"ir_optim": true,
|
||||
"enable_mkldnn": false,
|
||||
"use_tensorrt": false,
|
||||
"num_cpu_threads": 6,
|
||||
"gpu_id": 0,
|
||||
"gpu_mem": 2000,
|
||||
|
||||
"det_algorithm": "DB",
|
||||
"det_model_dir": "https://paddleocr.bj.bcebos.com/20-09-22/mobile/det/ch_ppocr_mobile_v1.1_det_infer.tar",
|
||||
"det_max_side_len": 960,
|
||||
|
||||
"det_db_thresh": 0.3,
|
||||
"det_db_box_thresh": 0.5,
|
||||
"det_db_unclip_ratio": 2.0,
|
||||
|
||||
"det_east_score_thresh": 0.8,
|
||||
"det_east_cover_thresh": 0.1,
|
||||
"det_east_nms_thresh": 0.2,
|
||||
|
||||
"rec_algorithm": "CRNN",
|
||||
"rec_model_dir": "https://paddleocr.bj.bcebos.com/20-09-22/mobile/rec/ch_ppocr_mobile_v1.1_rec_infer.tar",
|
||||
"rec_image_shape": []interface{}{3, 32, 320},
|
||||
"rec_char_type": "ch",
|
||||
"rec_batch_num": 30,
|
||||
"max_text_length": 25,
|
||||
"rec_char_dict_path": "https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/develop/ppocr/utils/ppocr_keys_v1.txt",
|
||||
"use_space_char": true,
|
||||
|
||||
"use_angle_cls": false,
|
||||
"cls_model_dir": "https://paddleocr.bj.bcebos.com/20-09-22/cls/ch_ppocr_mobile_v1.1_cls_infer.tar",
|
||||
"cls_image_shape": []interface{}{3, 48, 192},
|
||||
"label_list": []interface{}{"0", "180"},
|
||||
"cls_batch_num": 30,
|
||||
"cls_thresh": 0.9,
|
||||
|
||||
"lang": "ch",
|
||||
"det": true,
|
||||
"rec": true,
|
||||
"cls": false,
|
||||
}
|
||||
)
|
|
@ -0,0 +1,107 @@
|
|||
package ocr
|
||||
|
||||
import (
|
||||
"log"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/LKKlein/gocv"
|
||||
)
|
||||
|
||||
type TextClassifier struct {
|
||||
*PaddleModel
|
||||
batchNum int
|
||||
thresh float64
|
||||
shape []int
|
||||
labels []string
|
||||
}
|
||||
|
||||
type ClsResult struct {
|
||||
Score float32
|
||||
Label int64
|
||||
}
|
||||
|
||||
func NewTextClassifier(modelDir string, args map[string]interface{}) *TextClassifier {
|
||||
shapes := []int{3, 48, 192}
|
||||
if v, ok := args["cls_image_shape"]; ok {
|
||||
for i, s := range v.([]interface{}) {
|
||||
shapes[i] = s.(int)
|
||||
}
|
||||
}
|
||||
cls := &TextClassifier{
|
||||
PaddleModel: NewPaddleModel(args),
|
||||
batchNum: getInt(args, "cls_batch_num", 30),
|
||||
thresh: getFloat64(args, "cls_thresh", 0.9),
|
||||
shape: shapes,
|
||||
}
|
||||
if checkModelExists(modelDir) {
|
||||
home, _ := os.UserHomeDir()
|
||||
modelDir, _ = downloadModel(home+"/.paddleocr/cls", modelDir)
|
||||
} else {
|
||||
log.Panicf("cls model path: %v not exist! Please check!", modelDir)
|
||||
}
|
||||
cls.LoadModel(modelDir)
|
||||
return cls
|
||||
}
|
||||
|
||||
func (cls *TextClassifier) Run(imgs []gocv.Mat) []gocv.Mat {
|
||||
batch := cls.batchNum
|
||||
var clsTime int64 = 0
|
||||
clsout := make([]ClsResult, len(imgs))
|
||||
srcimgs := make([]gocv.Mat, len(imgs))
|
||||
c, h, w := cls.shape[0], cls.shape[1], cls.shape[2]
|
||||
for i := 0; i < len(imgs); i += batch {
|
||||
j := i + batch
|
||||
if len(imgs) < j {
|
||||
j = len(imgs)
|
||||
}
|
||||
|
||||
normImgs := make([]float32, (j-i)*c*h*w)
|
||||
for k := i; k < j; k++ {
|
||||
tmp := gocv.NewMat()
|
||||
imgs[k].CopyTo(&tmp)
|
||||
srcimgs[k] = tmp
|
||||
img := clsResize(imgs[k], cls.shape)
|
||||
data := normPermute(img, []float32{0.5, 0.5, 0.5}, []float32{0.5, 0.5, 0.5}, 255.0)
|
||||
copy(normImgs[(k-i)*c*h*w:], data)
|
||||
}
|
||||
|
||||
st := time.Now()
|
||||
cls.input.SetValue(normImgs)
|
||||
cls.input.Reshape([]int32{int32(j - i), int32(c), int32(h), int32(w)})
|
||||
|
||||
cls.predictor.SetZeroCopyInput(cls.input)
|
||||
cls.predictor.ZeroCopyRun()
|
||||
cls.predictor.GetZeroCopyOutput(cls.outputs[0])
|
||||
cls.predictor.GetZeroCopyOutput(cls.outputs[1])
|
||||
|
||||
var probout [][]float32
|
||||
var labelout []int64
|
||||
if len(cls.outputs[0].Shape()) == 2 {
|
||||
probout = cls.outputs[0].Value().([][]float32)
|
||||
} else {
|
||||
labelout = cls.outputs[0].Value().([]int64)
|
||||
}
|
||||
|
||||
if len(cls.outputs[1].Shape()) == 2 {
|
||||
probout = cls.outputs[1].Value().([][]float32)
|
||||
} else {
|
||||
labelout = cls.outputs[1].Value().([]int64)
|
||||
}
|
||||
clsTime += int64(time.Since(st).Milliseconds())
|
||||
|
||||
for no, label := range labelout {
|
||||
score := probout[no][label]
|
||||
clsout[i+no] = ClsResult{
|
||||
Score: score,
|
||||
Label: label,
|
||||
}
|
||||
|
||||
if label%2 == 1 && float64(score) > cls.thresh {
|
||||
gocv.Rotate(srcimgs[i+no], &srcimgs[i+no], gocv.Rotate180Clockwise)
|
||||
}
|
||||
}
|
||||
}
|
||||
log.Println("cls num: ", len(clsout), ", cls time elapse: ", clsTime, "ms")
|
||||
return srcimgs
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
package ocr
|
||||
|
||||
import (
|
||||
"log"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/LKKlein/gocv"
|
||||
)
|
||||
|
||||
type DBDetector struct {
|
||||
*PaddleModel
|
||||
preProcess DetPreProcess
|
||||
postProcess DetPostProcess
|
||||
}
|
||||
|
||||
func NewDBDetector(modelDir string, args map[string]interface{}) *DBDetector {
|
||||
maxSideLen := getInt(args, "det_max_side_len", 960)
|
||||
thresh := getFloat64(args, "det_db_thresh", 0.3)
|
||||
boxThresh := getFloat64(args, "det_db_box_thresh", 0.5)
|
||||
unClipRatio := getFloat64(args, "det_db_unclip_ratio", 2.0)
|
||||
|
||||
detector := &DBDetector{
|
||||
PaddleModel: NewPaddleModel(args),
|
||||
preProcess: NewDBProcess(make([]int, 0), maxSideLen),
|
||||
postProcess: NewDBPostProcess(thresh, boxThresh, unClipRatio),
|
||||
}
|
||||
if checkModelExists(modelDir) {
|
||||
home, _ := os.UserHomeDir()
|
||||
modelDir, _ = downloadModel(home+"/.paddleocr/det", modelDir)
|
||||
} else {
|
||||
log.Panicf("det model path: %v not exist! Please check!", modelDir)
|
||||
}
|
||||
detector.LoadModel(modelDir)
|
||||
return detector
|
||||
}
|
||||
|
||||
func (det *DBDetector) Run(img gocv.Mat) [][][]int {
|
||||
oriH := img.Rows()
|
||||
oriW := img.Cols()
|
||||
data, resizeH, resizeW := det.preProcess.Run(img)
|
||||
st := time.Now()
|
||||
det.input.SetValue(data)
|
||||
det.input.Reshape([]int32{1, 3, int32(resizeH), int32(resizeW)})
|
||||
|
||||
det.predictor.SetZeroCopyInput(det.input)
|
||||
det.predictor.ZeroCopyRun()
|
||||
det.predictor.GetZeroCopyOutput(det.outputs[0])
|
||||
|
||||
ratioH, ratioW := float64(resizeH)/float64(oriH), float64(resizeW)/float64(oriW)
|
||||
boxes := det.postProcess.Run(det.outputs[0], oriH, oriW, ratioH, ratioW)
|
||||
log.Println("det_box num: ", len(boxes), ", time elapse: ", time.Since(st))
|
||||
return boxes
|
||||
}
|
|
@ -0,0 +1,131 @@
|
|||
package ocr
|
||||
|
||||
import (
|
||||
"log"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/LKKlein/gocv"
|
||||
)
|
||||
|
||||
type TextRecognizer struct {
|
||||
*PaddleModel
|
||||
batchNum int
|
||||
textLen int
|
||||
shape []int
|
||||
charType string
|
||||
labels []string
|
||||
}
|
||||
|
||||
func NewTextRecognizer(modelDir string, args map[string]interface{}) *TextRecognizer {
|
||||
shapes := []int{3, 32, 320}
|
||||
if v, ok := args["rec_image_shape"]; ok {
|
||||
for i, s := range v.([]interface{}) {
|
||||
shapes[i] = s.(int)
|
||||
}
|
||||
}
|
||||
home, _ := os.UserHomeDir()
|
||||
labelpath := getString(args, "rec_char_dict_path", home+"/.paddleocr/rec/ppocr_keys_v1.txt")
|
||||
labels := readLines2StringSlice(labelpath)
|
||||
if getBool(args, "use_space_char", true) {
|
||||
labels = append(labels, " ")
|
||||
}
|
||||
rec := &TextRecognizer{
|
||||
PaddleModel: NewPaddleModel(args),
|
||||
batchNum: getInt(args, "rec_batch_num", 30),
|
||||
textLen: getInt(args, "max_text_length", 25),
|
||||
charType: getString(args, "rec_char_type", "ch"),
|
||||
shape: shapes,
|
||||
labels: labels,
|
||||
}
|
||||
if checkModelExists(modelDir) {
|
||||
modelDir, _ = downloadModel(home+"/.paddleocr/rec/ch", modelDir)
|
||||
} else {
|
||||
log.Panicf("rec model path: %v not exist! Please check!", modelDir)
|
||||
}
|
||||
rec.LoadModel(modelDir)
|
||||
return rec
|
||||
}
|
||||
|
||||
func (rec *TextRecognizer) Run(imgs []gocv.Mat, bboxes [][][]int) []OCRText {
|
||||
recResult := make([]OCRText, 0, len(imgs))
|
||||
batch := rec.batchNum
|
||||
var recTime int64 = 0
|
||||
c, h, w := rec.shape[0], rec.shape[1], rec.shape[2]
|
||||
for i := 0; i < len(imgs); i += batch {
|
||||
j := i + batch
|
||||
if len(imgs) < j {
|
||||
j = len(imgs)
|
||||
}
|
||||
|
||||
maxwhratio := 0.0
|
||||
for k := i; k < j; k++ {
|
||||
h, w := imgs[k].Rows(), imgs[k].Cols()
|
||||
ratio := float64(w) / float64(h)
|
||||
if ratio > maxwhratio {
|
||||
maxwhratio = ratio
|
||||
}
|
||||
}
|
||||
|
||||
if rec.charType == "ch" {
|
||||
w = int(32 * maxwhratio)
|
||||
}
|
||||
normimgs := make([]float32, (j-i)*c*h*w)
|
||||
|
||||
for k := i; k < j; k++ {
|
||||
data := crnnPreprocess(imgs[k], rec.shape, []float32{0.5, 0.5, 0.5},
|
||||
[]float32{0.5, 0.5, 0.5}, 255.0, maxwhratio, rec.charType)
|
||||
defer imgs[k].Close()
|
||||
copy(normimgs[(k-i)*c*h*w:], data)
|
||||
}
|
||||
|
||||
st := time.Now()
|
||||
rec.input.SetValue(normimgs)
|
||||
rec.input.Reshape([]int32{int32(j - i), int32(c), int32(h), int32(w)})
|
||||
|
||||
rec.predictor.SetZeroCopyInput(rec.input)
|
||||
rec.predictor.ZeroCopyRun()
|
||||
rec.predictor.GetZeroCopyOutput(rec.outputs[0])
|
||||
rec.predictor.GetZeroCopyOutput(rec.outputs[1])
|
||||
|
||||
recIdxBatch := rec.outputs[0].Value().([][]int64)
|
||||
recIdxLod := rec.outputs[0].Lod()
|
||||
|
||||
predictBatch := rec.outputs[1].Value().([][]float32)
|
||||
predictLod := rec.outputs[1].Lod()
|
||||
recTime += int64(time.Since(st).Milliseconds())
|
||||
|
||||
for rno := 0; rno < len(recIdxLod)-1; rno++ {
|
||||
predIdx := make([]int, 0, 2)
|
||||
for beg := recIdxLod[rno]; beg < recIdxLod[rno+1]; beg++ {
|
||||
predIdx = append(predIdx, int(recIdxBatch[beg][0]))
|
||||
}
|
||||
if len(predIdx) == 0 {
|
||||
continue
|
||||
}
|
||||
words := ""
|
||||
for n := 0; n < len(predIdx); n++ {
|
||||
words += rec.labels[predIdx[n]]
|
||||
}
|
||||
|
||||
score := 0.0
|
||||
count := 0
|
||||
blankPosition := int(rec.outputs[1].Shape()[1])
|
||||
for beg := predictLod[rno]; beg < predictLod[rno+1]; beg++ {
|
||||
argMaxID, maxVal := argmax(predictBatch[beg])
|
||||
if blankPosition-1-argMaxID > 0 {
|
||||
score += float64(maxVal)
|
||||
count++
|
||||
}
|
||||
}
|
||||
score = score / float64(count)
|
||||
recResult = append(recResult, OCRText{
|
||||
BBox: bboxes[i+rno],
|
||||
Text: words,
|
||||
Score: score,
|
||||
})
|
||||
}
|
||||
}
|
||||
log.Println("rec num: ", len(recResult), ", rec time elapse: ", recTime, "ms")
|
||||
return recResult
|
||||
}
|
|
@ -0,0 +1,265 @@
|
|||
package ocr
|
||||
|
||||
import (
|
||||
"image"
|
||||
"image/color"
|
||||
"math"
|
||||
"sort"
|
||||
|
||||
"github.com/LKKlein/gocv"
|
||||
"github.com/PaddlePaddle/PaddleOCR/thirdparty/paddleocr-go/paddle"
|
||||
clipper "github.com/ctessum/go.clipper"
|
||||
)
|
||||
|
||||
type xFloatSortBy [][]float32
|
||||
|
||||
func (a xFloatSortBy) Len() int { return len(a) }
|
||||
func (a xFloatSortBy) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
||||
func (a xFloatSortBy) Less(i, j int) bool { return a[i][0] < a[j][0] }
|
||||
|
||||
type xIntSortBy [][]int
|
||||
|
||||
func (a xIntSortBy) Len() int { return len(a) }
|
||||
func (a xIntSortBy) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
||||
func (a xIntSortBy) Less(i, j int) bool { return a[i][0] < a[j][0] }
|
||||
|
||||
type DetPostProcess interface {
|
||||
Run(output *paddle.ZeroCopyTensor, oriH, oriW int, ratioH, ratioW float64) [][][]int
|
||||
}
|
||||
|
||||
type DBPostProcess struct {
|
||||
thresh float64
|
||||
boxThresh float64
|
||||
maxCandidates int
|
||||
unClipRatio float64
|
||||
minSize int
|
||||
}
|
||||
|
||||
func NewDBPostProcess(thresh, boxThresh, unClipRatio float64) *DBPostProcess {
|
||||
return &DBPostProcess{
|
||||
thresh: thresh,
|
||||
boxThresh: boxThresh,
|
||||
unClipRatio: unClipRatio,
|
||||
maxCandidates: 1000,
|
||||
minSize: 3,
|
||||
}
|
||||
}
|
||||
|
||||
func (d *DBPostProcess) getMinBoxes(rect gocv.RotatedRect) [][]float32 {
|
||||
points := gocv.NewMat()
|
||||
gocv.BoxPoints(rect, &points)
|
||||
defer points.Close()
|
||||
array := d.mat2slice(points)
|
||||
sort.Sort(xFloatSortBy(array))
|
||||
|
||||
point1, point2, point3, point4 := array[0], array[1], array[2], array[3]
|
||||
if array[3][1] <= array[2][1] {
|
||||
point2, point3 = array[3], array[2]
|
||||
} else {
|
||||
point2, point3 = array[2], array[3]
|
||||
}
|
||||
|
||||
if array[1][1] <= array[0][1] {
|
||||
point1, point4 = array[1], array[0]
|
||||
} else {
|
||||
point1, point4 = array[0], array[1]
|
||||
}
|
||||
|
||||
array = [][]float32{point1, point2, point3, point4}
|
||||
return array
|
||||
}
|
||||
|
||||
func (d *DBPostProcess) mat2slice(mat gocv.Mat) [][]float32 {
|
||||
array := make([][]float32, mat.Rows())
|
||||
for i := 0; i < mat.Rows(); i++ {
|
||||
tmp := make([]float32, mat.Cols())
|
||||
for j := 0; j < mat.Cols(); j++ {
|
||||
tmp[j] = mat.GetFloatAt(i, j)
|
||||
}
|
||||
array[i] = tmp
|
||||
}
|
||||
return array
|
||||
}
|
||||
|
||||
func (d *DBPostProcess) boxScoreFast(array [][]float32, pred gocv.Mat) float64 {
|
||||
height, width := pred.Rows(), pred.Cols()
|
||||
boxX := []float32{array[0][0], array[1][0], array[2][0], array[3][0]}
|
||||
boxY := []float32{array[0][1], array[1][1], array[2][1], array[3][1]}
|
||||
|
||||
xmin := clip(int(math.Floor(float64(minf(boxX)))), 0, width-1)
|
||||
xmax := clip(int(math.Ceil(float64(maxf(boxX)))), 0, width-1)
|
||||
ymin := clip(int(math.Floor(float64(minf(boxY)))), 0, height-1)
|
||||
ymax := clip(int(math.Ceil(float64(maxf(boxY)))), 0, height-1)
|
||||
|
||||
mask := gocv.NewMatWithSize(ymax-ymin+1, xmax-xmin+1, gocv.MatTypeCV8UC1)
|
||||
defer mask.Close()
|
||||
ppt := make([][]image.Point, 1)
|
||||
ppt[0] = make([]image.Point, 4)
|
||||
ppt[0][0] = image.Point{int(array[0][0]) - xmin, int(array[0][1]) - ymin}
|
||||
ppt[0][1] = image.Point{int(array[1][0]) - xmin, int(array[1][1]) - ymin}
|
||||
ppt[0][2] = image.Point{int(array[2][0]) - xmin, int(array[2][1]) - ymin}
|
||||
ppt[0][3] = image.Point{int(array[3][0]) - xmin, int(array[3][1]) - ymin}
|
||||
gocv.FillPoly(&mask, ppt, color.RGBA{0, 0, 1, 0})
|
||||
croppedImg := pred.Region(image.Rect(xmin, ymin, xmax+1, ymax+1))
|
||||
s := croppedImg.MeanWithMask(mask)
|
||||
return s.Val1
|
||||
}
|
||||
|
||||
func (d *DBPostProcess) unClip(box [][]float32) gocv.RotatedRect {
|
||||
var area, dist float64
|
||||
for i := 0; i < 4; i++ {
|
||||
area += float64(box[i][0]*box[(i+1)%4][1] - box[i][1]*box[(i+1)%4][0])
|
||||
dist += math.Sqrt(float64(
|
||||
(box[i][0]-box[(i+1)%4][0])*(box[i][0]-box[(i+1)%4][0]) +
|
||||
(box[i][1]-box[(i+1)%4][1])*(box[i][1]-box[(i+1)%4][1]),
|
||||
))
|
||||
}
|
||||
area = math.Abs(area / 2.0)
|
||||
distance := area * d.unClipRatio / dist
|
||||
offset := clipper.NewClipperOffset()
|
||||
path := make([]*clipper.IntPoint, 4)
|
||||
path[0] = &clipper.IntPoint{X: clipper.CInt(box[0][0]), Y: clipper.CInt(box[0][1])}
|
||||
path[1] = &clipper.IntPoint{X: clipper.CInt(box[1][0]), Y: clipper.CInt(box[1][1])}
|
||||
path[2] = &clipper.IntPoint{X: clipper.CInt(box[2][0]), Y: clipper.CInt(box[2][1])}
|
||||
path[3] = &clipper.IntPoint{X: clipper.CInt(box[3][0]), Y: clipper.CInt(box[3][1])}
|
||||
offset.AddPath(clipper.Path(path), clipper.JtRound, clipper.EtClosedPolygon)
|
||||
soln := offset.Execute(distance)
|
||||
|
||||
points := make([]image.Point, 0, 4)
|
||||
for i := 0; i < len(soln); i++ {
|
||||
for j := 0; j < len(soln[i]); j++ {
|
||||
points = append(points, image.Point{int(soln[i][j].X), int(soln[i][j].Y)})
|
||||
}
|
||||
}
|
||||
|
||||
var res gocv.RotatedRect
|
||||
if len(points) <= 0 {
|
||||
points = make([]image.Point, 4)
|
||||
points[0] = image.Pt(0, 0)
|
||||
points[1] = image.Pt(1, 0)
|
||||
points[2] = image.Pt(1, 1)
|
||||
points[3] = image.Pt(0, 1)
|
||||
res = gocv.RotatedRect{
|
||||
Contour: points,
|
||||
BoundingRect: image.Rect(0, 0, 1, 1),
|
||||
Center: gocv.Point2f{X: 0.5, Y: 0.5},
|
||||
Width: 1,
|
||||
Height: 1,
|
||||
Angle: 0,
|
||||
}
|
||||
} else {
|
||||
res = gocv.MinAreaRect(points)
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
func (d *DBPostProcess) boxesFromBitmap(pred gocv.Mat, mask gocv.Mat, ratioH float64, ratioW float64) [][][]int {
|
||||
height, width := mask.Rows(), mask.Cols()
|
||||
mask.MultiplyUChar(255)
|
||||
contours := gocv.FindContours(mask, gocv.RetrievalList, gocv.ChainApproxSimple)
|
||||
numContours := len(contours)
|
||||
if numContours > d.maxCandidates {
|
||||
numContours = d.maxCandidates
|
||||
}
|
||||
|
||||
boxes := make([][][]int, 0, numContours)
|
||||
for i := 0; i < numContours; i++ {
|
||||
contour := contours[i]
|
||||
boundingbox := gocv.MinAreaRect(contour)
|
||||
if boundingbox.Width < float32(d.minSize) || boundingbox.Height < float32(d.minSize) {
|
||||
continue
|
||||
}
|
||||
points := d.getMinBoxes(boundingbox)
|
||||
score := d.boxScoreFast(points, pred)
|
||||
if score < d.boxThresh {
|
||||
continue
|
||||
}
|
||||
|
||||
box := d.unClip(points)
|
||||
if box.Width < float32(d.minSize+2) || box.Height < float32(d.minSize+2) {
|
||||
continue
|
||||
}
|
||||
|
||||
cliparray := d.getMinBoxes(box)
|
||||
dstHeight, dstWidth := pred.Rows(), pred.Cols()
|
||||
intcliparray := make([][]int, 4)
|
||||
for i := 0; i < 4; i++ {
|
||||
p := []int{
|
||||
int(float64(clip(int(math.Round(
|
||||
float64(cliparray[i][0]/float32(width)*float32(dstWidth)))), 0, dstWidth)) / ratioW),
|
||||
int(float64(clip(int(math.Round(
|
||||
float64(cliparray[i][1]/float32(height)*float32(dstHeight)))), 0, dstHeight)) / ratioH),
|
||||
}
|
||||
intcliparray[i] = p
|
||||
}
|
||||
boxes = append(boxes, intcliparray)
|
||||
}
|
||||
return boxes
|
||||
}
|
||||
|
||||
func (d *DBPostProcess) orderPointsClockwise(box [][]int) [][]int {
|
||||
sort.Sort(xIntSortBy(box))
|
||||
leftmost := [][]int{box[0], box[1]}
|
||||
rightmost := [][]int{box[2], box[3]}
|
||||
|
||||
if leftmost[0][1] > leftmost[1][1] {
|
||||
leftmost[0], leftmost[1] = leftmost[1], leftmost[0]
|
||||
}
|
||||
|
||||
if rightmost[0][1] > rightmost[1][1] {
|
||||
rightmost[0], rightmost[1] = rightmost[1], rightmost[0]
|
||||
}
|
||||
|
||||
return [][]int{leftmost[0], rightmost[0], rightmost[1], leftmost[1]}
|
||||
}
|
||||
|
||||
func (d *DBPostProcess) filterTagDetRes(boxes [][][]int, oriH, oriW int) [][][]int {
|
||||
points := make([][][]int, 0, len(boxes))
|
||||
for i := 0; i < len(boxes); i++ {
|
||||
boxes[i] = d.orderPointsClockwise(boxes[i])
|
||||
for j := 0; j < len(boxes[i]); j++ {
|
||||
boxes[i][j][0] = clip(boxes[i][j][0], 0, oriW-1)
|
||||
boxes[i][j][1] = clip(boxes[i][j][1], 0, oriH-1)
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i < len(boxes); i++ {
|
||||
rectW := int(math.Sqrt(math.Pow(float64(boxes[i][0][0]-boxes[i][1][0]), 2.0) +
|
||||
math.Pow(float64(boxes[i][0][1]-boxes[i][1][1]), 2.0)))
|
||||
rectH := int(math.Sqrt(math.Pow(float64(boxes[i][0][0]-boxes[i][3][0]), 2.0) +
|
||||
math.Pow(float64(boxes[i][0][1]-boxes[i][3][1]), 2.0)))
|
||||
if rectW <= 4 || rectH <= 4 {
|
||||
continue
|
||||
}
|
||||
points = append(points, boxes[i])
|
||||
}
|
||||
return points
|
||||
}
|
||||
|
||||
func (d *DBPostProcess) Run(output *paddle.ZeroCopyTensor, oriH, oriW int, ratioH, ratioW float64) [][][]int {
|
||||
v := output.Value().([][][][]float32)
|
||||
|
||||
shape := output.Shape()
|
||||
height, width := int(shape[2]), int(shape[3])
|
||||
|
||||
pred := gocv.NewMatWithSize(height, width, gocv.MatTypeCV32F)
|
||||
bitmap := gocv.NewMatWithSize(height, width, gocv.MatTypeCV8UC1)
|
||||
thresh := float32(d.thresh)
|
||||
for i := 0; i < height; i++ {
|
||||
for j := 0; j < width; j++ {
|
||||
pred.SetFloatAt(i, j, v[0][0][i][j])
|
||||
if v[0][0][i][j] > thresh {
|
||||
bitmap.SetUCharAt(i, j, 1)
|
||||
} else {
|
||||
bitmap.SetUCharAt(i, j, 0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mask := gocv.NewMat()
|
||||
kernel := gocv.GetStructuringElement(gocv.MorphRect, image.Point{2, 2})
|
||||
gocv.Dilate(bitmap, &mask, kernel)
|
||||
boxes := d.boxesFromBitmap(pred, mask, ratioH, ratioW)
|
||||
dtboxes := d.filterTagDetRes(boxes, oriH, oriW)
|
||||
return dtboxes
|
||||
}
|
|
@ -0,0 +1,169 @@
|
|||
package ocr
|
||||
|
||||
import (
|
||||
"image"
|
||||
"image/color"
|
||||
"math"
|
||||
|
||||
"github.com/LKKlein/gocv"
|
||||
)
|
||||
|
||||
func resizeByShape(img gocv.Mat, resizeShape []int) (gocv.Mat, int, int) {
|
||||
resizeH := resizeShape[0]
|
||||
resizeW := resizeShape[1]
|
||||
gocv.Resize(img, &img, image.Pt(resizeW, resizeH), 0, 0, gocv.InterpolationLinear)
|
||||
return img, resizeH, resizeW
|
||||
}
|
||||
|
||||
func resizeByMaxLen(img gocv.Mat, maxLen int) (gocv.Mat, int, int) {
|
||||
oriH := img.Rows()
|
||||
oriW := img.Cols()
|
||||
var resizeH, resizeW int = oriH, oriW
|
||||
|
||||
var ratio float64 = 1.0
|
||||
if resizeH > maxLen || resizeW > maxLen {
|
||||
if resizeH > resizeW {
|
||||
ratio = float64(maxLen) / float64(resizeH)
|
||||
} else {
|
||||
ratio = float64(maxLen) / float64(resizeW)
|
||||
}
|
||||
}
|
||||
|
||||
resizeH = int(float64(resizeH) * ratio)
|
||||
resizeW = int(float64(resizeW) * ratio)
|
||||
|
||||
if resizeH%32 == 0 {
|
||||
resizeH = resizeH
|
||||
} else if resizeH/32 <= 1 {
|
||||
resizeH = 32
|
||||
} else {
|
||||
resizeH = (resizeH/32 - 1) * 32
|
||||
}
|
||||
|
||||
if resizeW%32 == 0 {
|
||||
resizeW = resizeW
|
||||
} else if resizeW/32 <= 1 {
|
||||
resizeW = 32
|
||||
} else {
|
||||
resizeW = (resizeW/32 - 1) * 32
|
||||
}
|
||||
|
||||
if resizeW <= 0 || resizeH <= 0 {
|
||||
return gocv.NewMat(), 0, 0
|
||||
}
|
||||
|
||||
gocv.Resize(img, &img, image.Pt(resizeW, resizeH), 0, 0, gocv.InterpolationLinear)
|
||||
return img, resizeH, resizeW
|
||||
}
|
||||
|
||||
func normPermute(img gocv.Mat, mean []float32, std []float32, scaleFactor float32) []float32 {
|
||||
img.ConvertTo(&img, gocv.MatTypeCV32F)
|
||||
img.DivideFloat(scaleFactor)
|
||||
|
||||
c := gocv.Split(img)
|
||||
data := make([]float32, img.Rows()*img.Cols()*img.Channels())
|
||||
for i := 0; i < 3; i++ {
|
||||
c[i].SubtractFloat(mean[i])
|
||||
c[i].DivideFloat(std[i])
|
||||
defer c[i].Close()
|
||||
x, _ := c[i].DataPtrFloat32()
|
||||
copy(data[i*img.Rows()*img.Cols():], x)
|
||||
}
|
||||
return data
|
||||
}
|
||||
|
||||
type DetPreProcess interface {
|
||||
Run(gocv.Mat) ([]float32, int, int)
|
||||
}
|
||||
|
||||
type DBPreProcess struct {
|
||||
resizeType int
|
||||
imageShape []int
|
||||
maxSideLen int
|
||||
mean []float32
|
||||
std []float32
|
||||
scaleFactor float32
|
||||
}
|
||||
|
||||
func NewDBProcess(shape []int, sideLen int) *DBPreProcess {
|
||||
db := &DBPreProcess{
|
||||
resizeType: 0,
|
||||
imageShape: shape,
|
||||
maxSideLen: sideLen,
|
||||
mean: []float32{0.485, 0.456, 0.406},
|
||||
std: []float32{0.229, 0.224, 0.225},
|
||||
scaleFactor: 255.0,
|
||||
}
|
||||
if len(shape) > 0 {
|
||||
db.resizeType = 1
|
||||
}
|
||||
if sideLen == 0 {
|
||||
db.maxSideLen = 2400
|
||||
}
|
||||
return db
|
||||
}
|
||||
|
||||
func (d *DBPreProcess) Run(img gocv.Mat) ([]float32, int, int) {
|
||||
var resizeH, resizeW int
|
||||
if d.resizeType == 0 {
|
||||
img, resizeH, resizeW = resizeByMaxLen(img, d.maxSideLen)
|
||||
} else {
|
||||
img, resizeH, resizeW = resizeByShape(img, d.imageShape)
|
||||
}
|
||||
|
||||
im := normPermute(img, d.mean, d.std, d.scaleFactor)
|
||||
return im, resizeH, resizeW
|
||||
}
|
||||
|
||||
func clsResize(img gocv.Mat, resizeShape []int) gocv.Mat {
|
||||
imgH, imgW := resizeShape[1], resizeShape[2]
|
||||
h, w := img.Rows(), img.Cols()
|
||||
ratio := float64(w) / float64(h)
|
||||
var resizeW int
|
||||
if math.Ceil(float64(imgH)*ratio) > float64(imgW) {
|
||||
resizeW = imgW
|
||||
} else {
|
||||
resizeW = int(math.Ceil(float64(imgH) * ratio))
|
||||
}
|
||||
gocv.Resize(img, &img, image.Pt(resizeW, imgH), 0, 0, gocv.InterpolationLinear)
|
||||
if resizeW < imgW {
|
||||
gocv.CopyMakeBorder(img, &img, 0, 0, 0, imgW-resizeW, gocv.BorderConstant, color.RGBA{0, 0, 0, 0})
|
||||
}
|
||||
return img
|
||||
}
|
||||
|
||||
func crnnPreprocess(img gocv.Mat, resizeShape []int, mean []float32, std []float32,
|
||||
scaleFactor float32, whRatio float64, charType string) []float32 {
|
||||
imgH := resizeShape[1]
|
||||
imgW := resizeShape[2]
|
||||
if charType == "ch" {
|
||||
imgW = int(32 * whRatio)
|
||||
}
|
||||
h, w := img.Rows(), img.Cols()
|
||||
ratio := float64(w) / float64(h)
|
||||
var resizeW int
|
||||
if math.Ceil(float64(imgH)*ratio) > float64(imgW) {
|
||||
resizeW = imgW
|
||||
} else {
|
||||
resizeW = int(math.Ceil(float64(imgH) * ratio))
|
||||
}
|
||||
gocv.Resize(img, &img, image.Pt(resizeW, imgH), 0, 0, gocv.InterpolationLinear)
|
||||
|
||||
img.ConvertTo(&img, gocv.MatTypeCV32F)
|
||||
img.DivideFloat(scaleFactor)
|
||||
img.SubtractScalar(gocv.NewScalar(float64(mean[0]), float64(mean[1]), float64(mean[2]), 0))
|
||||
img.DivideScalar(gocv.NewScalar(float64(std[0]), float64(std[1]), float64(std[2]), 0))
|
||||
|
||||
if resizeW < imgW {
|
||||
gocv.CopyMakeBorder(img, &img, 0, 0, 0, imgW-resizeW, gocv.BorderConstant, color.RGBA{0, 0, 0, 0})
|
||||
}
|
||||
|
||||
c := gocv.Split(img)
|
||||
data := make([]float32, img.Rows()*img.Cols()*img.Channels())
|
||||
for i := 0; i < 3; i++ {
|
||||
defer c[i].Close()
|
||||
x, _ := c[i].DataPtrFloat32()
|
||||
copy(data[i*img.Rows()*img.Cols():], x)
|
||||
}
|
||||
return data
|
||||
}
|
|
@ -0,0 +1,281 @@
|
|||
package ocr
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/LKKlein/gocv"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
func getString(args map[string]interface{}, key string, dv string) string {
|
||||
if f, ok := args[key]; ok {
|
||||
return f.(string)
|
||||
}
|
||||
return dv
|
||||
}
|
||||
|
||||
func getFloat64(args map[string]interface{}, key string, dv float64) float64 {
|
||||
if f, ok := args[key]; ok {
|
||||
return f.(float64)
|
||||
}
|
||||
return dv
|
||||
}
|
||||
|
||||
func getInt(args map[string]interface{}, key string, dv int) int {
|
||||
if i, ok := args[key]; ok {
|
||||
return i.(int)
|
||||
}
|
||||
return dv
|
||||
}
|
||||
|
||||
func getBool(args map[string]interface{}, key string, dv bool) bool {
|
||||
if b, ok := args[key]; ok {
|
||||
return b.(bool)
|
||||
}
|
||||
return dv
|
||||
}
|
||||
|
||||
func ReadImage(image_path string) gocv.Mat {
|
||||
img := gocv.IMRead(image_path, gocv.IMReadColor)
|
||||
if img.Empty() {
|
||||
log.Printf("Could not read image %s\n", image_path)
|
||||
os.Exit(1)
|
||||
}
|
||||
return img
|
||||
}
|
||||
|
||||
func clip(value, min, max int) int {
|
||||
if value <= min {
|
||||
return min
|
||||
} else if value >= max {
|
||||
return max
|
||||
}
|
||||
return value
|
||||
}
|
||||
|
||||
func minf(data []float32) float32 {
|
||||
v := data[0]
|
||||
for _, val := range data {
|
||||
if val < v {
|
||||
v = val
|
||||
}
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
func maxf(data []float32) float32 {
|
||||
v := data[0]
|
||||
for _, val := range data {
|
||||
if val > v {
|
||||
v = val
|
||||
}
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
func mini(data []int) int {
|
||||
v := data[0]
|
||||
for _, val := range data {
|
||||
if val < v {
|
||||
v = val
|
||||
}
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
func maxi(data []int) int {
|
||||
v := data[0]
|
||||
for _, val := range data {
|
||||
if val > v {
|
||||
v = val
|
||||
}
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
func argmax(arr []float32) (int, float32) {
|
||||
max_value, index := arr[0], 0
|
||||
for i, item := range arr {
|
||||
if item > max_value {
|
||||
max_value = item
|
||||
index = i
|
||||
}
|
||||
}
|
||||
return index, max_value
|
||||
}
|
||||
|
||||
func checkModelExists(modelPath string) bool {
|
||||
if isPathExist(modelPath+"/model") && isPathExist(modelPath+"/params") {
|
||||
return true
|
||||
}
|
||||
if strings.HasPrefix(modelPath, "http://") ||
|
||||
strings.HasPrefix(modelPath, "ftp://") || strings.HasPrefix(modelPath, "https://") {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func downloadFile(filepath, url string) error {
|
||||
resp, err := http.Get(url)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
out, err := os.Create(filepath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer out.Close()
|
||||
|
||||
_, err = io.Copy(out, resp.Body)
|
||||
log.Println("[download_file] from:", url, " to:", filepath)
|
||||
return err
|
||||
}
|
||||
|
||||
func isPathExist(path string) bool {
|
||||
if _, err := os.Stat(path); err == nil {
|
||||
return true
|
||||
} else if os.IsNotExist(err) {
|
||||
return false
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func downloadModel(modelDir, modelPath string) (string, error) {
|
||||
if modelPath != "" && (strings.HasPrefix(modelPath, "http://") ||
|
||||
strings.HasPrefix(modelPath, "ftp://") || strings.HasPrefix(modelPath, "https://")) {
|
||||
if checkModelExists(modelDir) {
|
||||
return modelDir, nil
|
||||
}
|
||||
_, suffix := path.Split(modelPath)
|
||||
outPath := filepath.Join(modelDir, suffix)
|
||||
outDir := filepath.Dir(outPath)
|
||||
if !isPathExist(outDir) {
|
||||
os.MkdirAll(outDir, os.ModePerm)
|
||||
}
|
||||
|
||||
if !isPathExist(outPath) {
|
||||
err := downloadFile(outPath, modelPath)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
if strings.HasSuffix(outPath, ".tar") && !checkModelExists(modelDir) {
|
||||
unTar(modelDir, outPath)
|
||||
os.Remove(outPath)
|
||||
return modelDir, nil
|
||||
}
|
||||
return modelDir, nil
|
||||
}
|
||||
return modelPath, nil
|
||||
}
|
||||
|
||||
func unTar(dst, src string) (err error) {
|
||||
fr, err := os.Open(src)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer fr.Close()
|
||||
|
||||
tr := tar.NewReader(fr)
|
||||
for {
|
||||
hdr, err := tr.Next()
|
||||
|
||||
switch {
|
||||
case err == io.EOF:
|
||||
return nil
|
||||
case err != nil:
|
||||
return err
|
||||
case hdr == nil:
|
||||
continue
|
||||
}
|
||||
|
||||
var dstFileDir string
|
||||
if strings.Contains(hdr.Name, "model") {
|
||||
dstFileDir = filepath.Join(dst, "model")
|
||||
} else if strings.Contains(hdr.Name, "params") {
|
||||
dstFileDir = filepath.Join(dst, "params")
|
||||
}
|
||||
|
||||
switch hdr.Typeflag {
|
||||
case tar.TypeDir:
|
||||
continue
|
||||
case tar.TypeReg:
|
||||
file, err := os.OpenFile(dstFileDir, os.O_CREATE|os.O_RDWR, os.FileMode(hdr.Mode))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err2 := io.Copy(file, tr)
|
||||
if err2 != nil {
|
||||
return err2
|
||||
}
|
||||
file.Close()
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func readLines2StringSlice(filepath string) []string {
|
||||
if strings.HasPrefix(filepath, "http://") || strings.HasPrefix(filepath, "https://") {
|
||||
home, _ := os.UserHomeDir()
|
||||
dir := home + "/.paddleocr/rec/"
|
||||
_, suffix := path.Split(filepath)
|
||||
f := dir + suffix
|
||||
if !isPathExist(f) {
|
||||
err := downloadFile(f, filepath)
|
||||
if err != nil {
|
||||
log.Println("download ppocr key file error! You can specify your local dict path by conf.yaml.")
|
||||
return nil
|
||||
}
|
||||
}
|
||||
filepath = f
|
||||
}
|
||||
content, err := ioutil.ReadFile(filepath)
|
||||
if err != nil {
|
||||
log.Println("read ppocr key file error!")
|
||||
return nil
|
||||
}
|
||||
lines := strings.Split(string(content), "\n")
|
||||
return lines
|
||||
}
|
||||
|
||||
func ReadYaml(yamlPath string) (map[string]interface{}, error) {
|
||||
data, err := ioutil.ReadFile(yamlPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var body interface{}
|
||||
if err := yaml.Unmarshal(data, &body); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
body = convertYaml2Map(body)
|
||||
return body.(map[string]interface{}), nil
|
||||
}
|
||||
|
||||
func convertYaml2Map(i interface{}) interface{} {
|
||||
switch x := i.(type) {
|
||||
case map[interface{}]interface{}:
|
||||
m2 := map[string]interface{}{}
|
||||
for k, v := range x {
|
||||
m2[k.(string)] = convertYaml2Map(v)
|
||||
}
|
||||
return m2
|
||||
case []interface{}:
|
||||
for i, v := range x {
|
||||
x[i] = convertYaml2Map(v)
|
||||
}
|
||||
}
|
||||
return i
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
package paddle
|
||||
|
||||
// #cgo CFLAGS: -I../paddle_c/include
|
||||
// #cgo LDFLAGS: -lpaddle_fluid_c
|
||||
// #include <stdbool.h>
|
||||
import "C"
|
||||
import "fmt"
|
||||
|
||||
func ConvertCBooleanToGo(b C.bool) bool {
|
||||
var c_false C.bool
|
||||
if b != c_false {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func numel(shape []int32) int32 {
|
||||
n := int32(1)
|
||||
for _, d := range shape {
|
||||
n *= d
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func bug(format string, args ...interface{}) error {
|
||||
return fmt.Errorf("Bug %v", fmt.Sprintf(format, args...))
|
||||
}
|
|
@ -0,0 +1,181 @@
|
|||
package paddle
|
||||
|
||||
// #include <stdbool.h>
|
||||
// #include <stdlib.h>
|
||||
// #include <paddle_c_api.h>
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
type Precision C.Precision
|
||||
|
||||
const (
|
||||
Precision_FLOAT32 Precision = C.kFloat32
|
||||
Precision_INT8 Precision = C.kInt8
|
||||
Precision_HALF Precision = C.kHalf
|
||||
)
|
||||
|
||||
type AnalysisConfig struct {
|
||||
c *C.PD_AnalysisConfig
|
||||
}
|
||||
|
||||
func NewAnalysisConfig() *AnalysisConfig {
|
||||
c_config := C.PD_NewAnalysisConfig()
|
||||
config := &AnalysisConfig{c: c_config}
|
||||
runtime.SetFinalizer(config, (*AnalysisConfig).finalize)
|
||||
return config
|
||||
}
|
||||
|
||||
func (config *AnalysisConfig) finalize() {
|
||||
C.PD_DeleteAnalysisConfig(config.c)
|
||||
}
|
||||
|
||||
func (config *AnalysisConfig) SetModel(model, params string) {
|
||||
c_model := C.CString(model)
|
||||
defer C.free(unsafe.Pointer(c_model))
|
||||
var c_params *C.char
|
||||
if params == "" {
|
||||
c_params = nil
|
||||
} else {
|
||||
c_params = C.CString(params)
|
||||
defer C.free(unsafe.Pointer(c_params))
|
||||
}
|
||||
|
||||
C.PD_SetModel(config.c, c_model, c_params)
|
||||
}
|
||||
|
||||
func (config *AnalysisConfig) ModelDir() string {
|
||||
return C.GoString(C.PD_ModelDir(config.c))
|
||||
}
|
||||
|
||||
func (config *AnalysisConfig) ProgFile() string {
|
||||
return C.GoString(C.PD_ProgFile(config.c))
|
||||
}
|
||||
|
||||
func (config *AnalysisConfig) ParamsFile() string {
|
||||
return C.GoString(C.PD_ParamsFile(config.c))
|
||||
}
|
||||
|
||||
func (config *AnalysisConfig) EnableUseGpu(memory_pool_init_size_mb int, device_id int) {
|
||||
C.PD_EnableUseGpu(config.c, C.int(memory_pool_init_size_mb), C.int(device_id))
|
||||
}
|
||||
|
||||
func (config *AnalysisConfig) DisableGpu() {
|
||||
C.PD_DisableGpu(config.c)
|
||||
}
|
||||
|
||||
func (config *AnalysisConfig) UseGpu() bool {
|
||||
return ConvertCBooleanToGo(C.PD_UseGpu(config.c))
|
||||
}
|
||||
|
||||
func (config *AnalysisConfig) GpuDeviceId() int {
|
||||
return int(C.PD_GpuDeviceId(config.c))
|
||||
}
|
||||
|
||||
func (config *AnalysisConfig) MemoryPoolInitSizeMb() int {
|
||||
return int(C.PD_MemoryPoolInitSizeMb(config.c))
|
||||
}
|
||||
|
||||
func (config *AnalysisConfig) EnableCudnn() {
|
||||
C.PD_EnableCUDNN(config.c)
|
||||
}
|
||||
|
||||
func (config *AnalysisConfig) CudnnEnabled() bool {
|
||||
return ConvertCBooleanToGo(C.PD_CudnnEnabled(config.c))
|
||||
}
|
||||
|
||||
func (config *AnalysisConfig) SwitchIrOptim(x bool) {
|
||||
C.PD_SwitchIrOptim(config.c, C.bool(x))
|
||||
}
|
||||
|
||||
func (config *AnalysisConfig) IrOptim() bool {
|
||||
return ConvertCBooleanToGo(C.PD_IrOptim(config.c))
|
||||
}
|
||||
|
||||
func (config *AnalysisConfig) SwitchUseFeedFetchOps(x bool) {
|
||||
C.PD_SwitchUseFeedFetchOps(config.c, C.bool(x))
|
||||
}
|
||||
|
||||
func (config *AnalysisConfig) UseFeedFetchOpsEnabled() bool {
|
||||
return ConvertCBooleanToGo(C.PD_UseFeedFetchOpsEnabled(config.c))
|
||||
}
|
||||
|
||||
func (config *AnalysisConfig) SwitchSpecifyInputNames(x bool) {
|
||||
C.PD_SwitchSpecifyInputNames(config.c, C.bool(x))
|
||||
}
|
||||
|
||||
func (config *AnalysisConfig) SpecifyInputName() bool {
|
||||
return ConvertCBooleanToGo(C.PD_SpecifyInputName(config.c))
|
||||
}
|
||||
|
||||
func (config *AnalysisConfig) EnableTensorRtEngine(workspace_size int, max_batch_size int, min_subgraph_size int, precision Precision, use_static bool, use_calib_mode bool) {
|
||||
C.PD_EnableTensorRtEngine(config.c, C.int(workspace_size), C.int(max_batch_size), C.int(min_subgraph_size), C.Precision(precision), C.bool(use_static), C.bool(use_calib_mode))
|
||||
}
|
||||
|
||||
func (config *AnalysisConfig) TensorrtEngineEnabled() bool {
|
||||
return ConvertCBooleanToGo(C.PD_TensorrtEngineEnabled(config.c))
|
||||
}
|
||||
|
||||
func (config *AnalysisConfig) SwitchIrDebug(x bool) {
|
||||
C.PD_SwitchIrDebug(config.c, C.bool(x))
|
||||
}
|
||||
|
||||
func (config *AnalysisConfig) EnableMkldnn() {
|
||||
C.PD_EnableMKLDNN(config.c)
|
||||
}
|
||||
|
||||
func (config *AnalysisConfig) SetCpuMathLibraryNumThreads(n int) {
|
||||
C.PD_SetCpuMathLibraryNumThreads(config.c, C.int(n))
|
||||
}
|
||||
|
||||
func (config *AnalysisConfig) CpuMathLibraryNumThreads() int {
|
||||
return int(C.PD_CpuMathLibraryNumThreads(config.c))
|
||||
}
|
||||
|
||||
func (config *AnalysisConfig) EnableMkldnnQuantizer() {
|
||||
C.PD_EnableMkldnnQuantizer(config.c)
|
||||
}
|
||||
|
||||
func (config *AnalysisConfig) MkldnnQuantizerEnabled() bool {
|
||||
return ConvertCBooleanToGo(C.PD_MkldnnQuantizerEnabled(config.c))
|
||||
}
|
||||
|
||||
// SetModelBuffer
|
||||
// ModelFromMemory
|
||||
|
||||
func (config *AnalysisConfig) EnableMemoryOptim() {
|
||||
C.PD_EnableMemoryOptim(config.c)
|
||||
}
|
||||
|
||||
func (config *AnalysisConfig) MemoryOptimEnabled() bool {
|
||||
return ConvertCBooleanToGo(C.PD_MemoryOptimEnabled(config.c))
|
||||
}
|
||||
|
||||
func (config *AnalysisConfig) EnableProfile() {
|
||||
C.PD_EnableProfile(config.c)
|
||||
}
|
||||
|
||||
func (config *AnalysisConfig) ProfileEnabled() bool {
|
||||
return ConvertCBooleanToGo(C.PD_ProfileEnabled(config.c))
|
||||
}
|
||||
|
||||
func (config *AnalysisConfig) DisableGlogInfo() {
|
||||
C.PD_DisableGlogInfo(config.c)
|
||||
}
|
||||
|
||||
func (config *AnalysisConfig) DeletePass(pass string) {
|
||||
c_pass := C.CString(pass)
|
||||
defer C.free(unsafe.Pointer(c_pass))
|
||||
C.PD_DeletePass(config.c, c_pass)
|
||||
}
|
||||
|
||||
func (config *AnalysisConfig) SetInValid() {
|
||||
C.PD_SetInValid(config.c)
|
||||
}
|
||||
|
||||
func (config *AnalysisConfig) IsValid() bool {
|
||||
return ConvertCBooleanToGo(C.PD_IsValid(config.c))
|
||||
}
|
|
@ -0,0 +1,101 @@
|
|||
package paddle
|
||||
|
||||
// #include <stdbool.h>
|
||||
// #include "paddle_c_api.h"
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"runtime"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
type Predictor struct {
|
||||
c *C.PD_Predictor
|
||||
}
|
||||
|
||||
func NewPredictor(config *AnalysisConfig) *Predictor {
|
||||
c_predictor := C.PD_NewPredictor((*config).c)
|
||||
predictor := &Predictor{c: c_predictor}
|
||||
runtime.SetFinalizer(predictor, (*Predictor).finalize)
|
||||
return predictor
|
||||
}
|
||||
|
||||
func (predictor *Predictor) finalize() {
|
||||
C.PD_DeletePredictor(predictor.c)
|
||||
}
|
||||
|
||||
func DeletePredictor(predictor *Predictor) {
|
||||
C.PD_DeletePredictor(predictor.c)
|
||||
}
|
||||
|
||||
func (predictor *Predictor) GetInputNum() int {
|
||||
return int(C.PD_GetInputNum(predictor.c))
|
||||
}
|
||||
|
||||
func (predictor *Predictor) GetOutputNum() int {
|
||||
return int(C.PD_GetOutputNum(predictor.c))
|
||||
}
|
||||
|
||||
func (predictor *Predictor) GetInputName(n int) string {
|
||||
return C.GoString(C.PD_GetInputName(predictor.c, C.int(n)))
|
||||
}
|
||||
|
||||
func (predictor *Predictor) GetOutputName(n int) string {
|
||||
return C.GoString(C.PD_GetOutputName(predictor.c, C.int(n)))
|
||||
}
|
||||
|
||||
func (predictor *Predictor) GetInputTensors() [](*ZeroCopyTensor) {
|
||||
var result [](*ZeroCopyTensor)
|
||||
for i := 0; i < predictor.GetInputNum(); i++ {
|
||||
tensor := NewZeroCopyTensor()
|
||||
tensor.c.name = C.PD_GetInputName(predictor.c, C.int(i))
|
||||
result = append(result, tensor)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (predictor *Predictor) GetOutputTensors() [](*ZeroCopyTensor) {
|
||||
var result [](*ZeroCopyTensor)
|
||||
for i := 0; i < predictor.GetOutputNum(); i++ {
|
||||
tensor := NewZeroCopyTensor()
|
||||
tensor.c.name = C.PD_GetOutputName(predictor.c, C.int(i))
|
||||
result = append(result, tensor)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (predictor *Predictor) GetInputNames() []string {
|
||||
names := make([]string, predictor.GetInputNum())
|
||||
for i := 0; i < len(names); i++ {
|
||||
names[i] = predictor.GetInputName(i)
|
||||
}
|
||||
return names
|
||||
}
|
||||
|
||||
func (predictor *Predictor) GetOutputNames() []string {
|
||||
names := make([]string, predictor.GetOutputNum())
|
||||
for i := 0; i < len(names); i++ {
|
||||
names[i] = predictor.GetOutputName(i)
|
||||
}
|
||||
return names
|
||||
}
|
||||
|
||||
func (predictor *Predictor) SetZeroCopyInput(tensor *ZeroCopyTensor) {
|
||||
C.PD_SetZeroCopyInput(predictor.c, tensor.c)
|
||||
}
|
||||
|
||||
func (predictor *Predictor) GetZeroCopyOutput(tensor *ZeroCopyTensor) {
|
||||
C.PD_GetZeroCopyOutput(predictor.c, tensor.c)
|
||||
tensor.name = C.GoString(tensor.c.name)
|
||||
var shape []int32
|
||||
shape_hdr := (*reflect.SliceHeader)(unsafe.Pointer(&shape))
|
||||
shape_hdr.Data = uintptr(unsafe.Pointer(tensor.c.shape.data))
|
||||
shape_hdr.Len = int(tensor.c.shape.length / C.sizeof_int)
|
||||
shape_hdr.Cap = int(tensor.c.shape.length / C.sizeof_int)
|
||||
tensor.Reshape(shape)
|
||||
}
|
||||
|
||||
func (predictor *Predictor) ZeroCopyRun() {
|
||||
C.PD_ZeroCopyRun(predictor.c)
|
||||
}
|
|
@ -0,0 +1,250 @@
|
|||
package paddle
|
||||
|
||||
// #include <stdbool.h>
|
||||
// #include <stdlib.h>
|
||||
// #include <string.h>
|
||||
// #include <paddle_c_api.h>
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"runtime"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
type PaddleDType C.PD_DataType
|
||||
|
||||
const (
|
||||
FLOAT32 PaddleDType = C.PD_FLOAT32
|
||||
INT32 PaddleDType = C.PD_INT32
|
||||
INT64 PaddleDType = C.PD_INT64
|
||||
UINT8 PaddleDType = C.PD_UINT8
|
||||
UNKDTYPE PaddleDType = C.PD_UNKDTYPE
|
||||
)
|
||||
|
||||
var types = []struct {
|
||||
gotype reflect.Type
|
||||
dtype PaddleDType
|
||||
}{
|
||||
{reflect.TypeOf(float32(0)), FLOAT32},
|
||||
{reflect.TypeOf(int32(0)), INT32},
|
||||
{reflect.TypeOf(int64(0)), INT64},
|
||||
{reflect.TypeOf(uint8(0)), UINT8},
|
||||
}
|
||||
|
||||
func typeOfDataType(dtype PaddleDType) reflect.Type {
|
||||
var ret reflect.Type
|
||||
for _, t := range types {
|
||||
if t.dtype == dtype {
|
||||
ret = t.gotype
|
||||
}
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
func sizeofDataType(dtype PaddleDType) int32 {
|
||||
switch dtype {
|
||||
case UINT8:
|
||||
return int32(C.sizeof_uchar)
|
||||
case INT32:
|
||||
return int32(C.sizeof_int)
|
||||
case INT64:
|
||||
return int32(C.sizeof_longlong)
|
||||
case FLOAT32:
|
||||
return int32(C.sizeof_float)
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
func shapeAndTypeOf(val reflect.Value) (shape []int32, dt PaddleDType) {
|
||||
gotype := val.Type()
|
||||
for gotype.Kind() == reflect.Array || gotype.Kind() == reflect.Slice {
|
||||
shape = append(shape, int32(val.Len()))
|
||||
if val.Len() > 0 {
|
||||
val = val.Index(0)
|
||||
}
|
||||
gotype = gotype.Elem()
|
||||
}
|
||||
|
||||
for _, t := range types {
|
||||
if gotype.Kind() == t.gotype.Kind() {
|
||||
return shape, PaddleDType(t.dtype)
|
||||
}
|
||||
}
|
||||
return shape, dt
|
||||
}
|
||||
|
||||
type ZeroCopyTensor struct {
|
||||
c *C.PD_ZeroCopyTensor
|
||||
name string
|
||||
shape []int32
|
||||
}
|
||||
|
||||
func NewZeroCopyTensor() *ZeroCopyTensor {
|
||||
c_tensor := C.PD_NewZeroCopyTensor()
|
||||
|
||||
tensor := &ZeroCopyTensor{c: c_tensor}
|
||||
runtime.SetFinalizer(tensor, (*ZeroCopyTensor).finalize)
|
||||
return tensor
|
||||
}
|
||||
|
||||
func (tensor *ZeroCopyTensor) finalize() {
|
||||
C.PD_DeleteZeroCopyTensor(tensor.c)
|
||||
}
|
||||
|
||||
func (tensor *ZeroCopyTensor) Shape() []int32 {
|
||||
return tensor.shape
|
||||
}
|
||||
|
||||
func (tensor *ZeroCopyTensor) Name() string {
|
||||
return C.GoString(tensor.c.name)
|
||||
}
|
||||
|
||||
func (tensor *ZeroCopyTensor) Rename(name string) {
|
||||
tensor.name = name
|
||||
tensor.c.name = (*C.char)(unsafe.Pointer(tensor.c.name))
|
||||
}
|
||||
|
||||
func (tensor *ZeroCopyTensor) Reshape(shape []int32) {
|
||||
tensor.shape = make([]int32, len(shape))
|
||||
copy(tensor.shape, shape)
|
||||
length := C.sizeof_int * C.size_t(len(shape))
|
||||
if tensor.c.shape.capacity < C.size_t(length) {
|
||||
if tensor.c.shape.capacity != C.size_t(0) {
|
||||
C.free(tensor.c.shape.data)
|
||||
}
|
||||
tensor.c.shape.data = C.malloc(length)
|
||||
tensor.c.shape.capacity = length
|
||||
}
|
||||
tensor.c.shape.length = length
|
||||
C.memcpy(tensor.c.shape.data, unsafe.Pointer(&shape[0]), length)
|
||||
}
|
||||
|
||||
func (tensor *ZeroCopyTensor) DataType() PaddleDType {
|
||||
return PaddleDType(tensor.c.dtype)
|
||||
}
|
||||
|
||||
func (tensor *ZeroCopyTensor) SetValue(value interface{}) {
|
||||
val := reflect.ValueOf(value)
|
||||
shape, dtype := shapeAndTypeOf(val)
|
||||
num := numel(shape)
|
||||
length := C.size_t(sizeofDataType(dtype) * num)
|
||||
if tensor.c.data.capacity < length {
|
||||
if tensor.c.data.capacity != C.size_t(0) {
|
||||
C.free(tensor.c.data.data)
|
||||
}
|
||||
tensor.c.data.data = C.malloc(length)
|
||||
tensor.c.data.capacity = length
|
||||
}
|
||||
tensor.c.data.length = length
|
||||
|
||||
switch dtype {
|
||||
case PaddleDType(UINT8):
|
||||
data := val.Interface().([]uint8)
|
||||
C.memcpy(tensor.c.data.data, unsafe.Pointer(&data[0]), length)
|
||||
case PaddleDType(INT32):
|
||||
data := val.Interface().([]int32)
|
||||
C.memcpy(tensor.c.data.data, unsafe.Pointer(&data[0]), length)
|
||||
case PaddleDType(INT64):
|
||||
data := val.Interface().([]int64)
|
||||
C.memcpy(tensor.c.data.data, unsafe.Pointer(&data[0]), length)
|
||||
case PaddleDType(FLOAT32):
|
||||
data := val.Interface().([]float32)
|
||||
C.memcpy(tensor.c.data.data, unsafe.Pointer(&data[0]), length)
|
||||
}
|
||||
tensor.c.dtype = C.PD_DataType(dtype)
|
||||
}
|
||||
|
||||
func (tensor *ZeroCopyTensor) tensorData() []byte {
|
||||
cbytes := tensor.c.data.data
|
||||
length := tensor.c.data.length
|
||||
var slice []byte
|
||||
if unsafe.Sizeof(unsafe.Pointer(nil)) == 8 {
|
||||
slice = (*[1<<50 - 1]byte)(unsafe.Pointer(cbytes))[:length:length]
|
||||
} else {
|
||||
slice = (*[1 << 30]byte)(unsafe.Pointer(cbytes))[:length:length]
|
||||
}
|
||||
return slice
|
||||
}
|
||||
|
||||
func (tensor *ZeroCopyTensor) Value() interface{} {
|
||||
t := typeOfDataType(PaddleDType(tensor.c.dtype))
|
||||
data := tensor.tensorData()
|
||||
return decodeTensor(data, tensor.Shape(), t).Interface()
|
||||
}
|
||||
|
||||
// It isn't safe to use reflect.SliceHeader as it uses a uintptr for Data and
|
||||
// this is not inspected by the garbage collector
|
||||
type sliceHeader struct {
|
||||
Data unsafe.Pointer
|
||||
Len int
|
||||
Cap int
|
||||
}
|
||||
|
||||
func decodeTensor(raw []byte, shape []int32, t reflect.Type) reflect.Value {
|
||||
// Create a 1-dimensional slice of the base large enough for the data and
|
||||
// copy the data in.
|
||||
n := int(numel(shape))
|
||||
|
||||
l := n * int(t.Size())
|
||||
typ := reflect.SliceOf(t)
|
||||
slice := reflect.MakeSlice(typ, n, n)
|
||||
baseBytes := *(*[]byte)(unsafe.Pointer(&sliceHeader{
|
||||
Data: unsafe.Pointer(slice.Pointer()),
|
||||
Len: l,
|
||||
Cap: l,
|
||||
}))
|
||||
copy(baseBytes, raw)
|
||||
|
||||
if len(shape) == 0 {
|
||||
// for n
|
||||
return slice.Index(0)
|
||||
}
|
||||
if len(shape) == 1 {
|
||||
// for {}
|
||||
return slice
|
||||
}
|
||||
// for {{} {}} {{} {}} {{} {}}
|
||||
if n == 0 {
|
||||
n = int(numel(shape[:len(shape)-1]))
|
||||
}
|
||||
for i := len(shape) - 2; i >= 0; i-- {
|
||||
underlyingSize := typ.Elem().Size()
|
||||
typ = reflect.SliceOf(typ)
|
||||
subsliceLen := int(shape[i+1])
|
||||
if subsliceLen != 0 {
|
||||
n = n / subsliceLen
|
||||
}
|
||||
data := unsafe.Pointer(slice.Pointer())
|
||||
nextSlice := reflect.MakeSlice(typ, n, n)
|
||||
|
||||
for j := 0; j < n; j++ {
|
||||
// This is equivalent to nSlice[j] = slice[j*subsliceLen: (j+1)*subsliceLen]
|
||||
setSliceInSlice(nextSlice, j, sliceHeader{
|
||||
Data: unsafe.Pointer(uintptr(data) + (uintptr(j*subsliceLen) * underlyingSize)),
|
||||
Len: subsliceLen,
|
||||
Cap: subsliceLen,
|
||||
})
|
||||
}
|
||||
|
||||
slice = nextSlice
|
||||
}
|
||||
return slice
|
||||
}
|
||||
|
||||
// setSliceInSlice sets slice[index] = content.
|
||||
func setSliceInSlice(slice reflect.Value, index int, content sliceHeader) {
|
||||
const sliceSize = unsafe.Sizeof(sliceHeader{})
|
||||
// We must cast slice.Pointer to uninptr & back again to avoid GC issues.
|
||||
// See https://github.com/google/go-cmp/issues/167#issuecomment-546093202
|
||||
*(*sliceHeader)(unsafe.Pointer(uintptr(unsafe.Pointer(slice.Pointer())) + (uintptr(index) * sliceSize))) = content
|
||||
}
|
||||
|
||||
func (tensor *ZeroCopyTensor) Lod() []uint {
|
||||
var val []uint
|
||||
valHdr := (*reflect.SliceHeader)(unsafe.Pointer(&val))
|
||||
valHdr.Data = uintptr(unsafe.Pointer(tensor.c.lod.data))
|
||||
valHdr.Len = int(tensor.c.lod.length / C.sizeof_size_t)
|
||||
valHdr.Cap = int(tensor.c.lod.length / C.sizeof_size_t)
|
||||
return val
|
||||
}
|
|
@ -0,0 +1,290 @@
|
|||
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#if defined(_WIN32)
|
||||
#ifdef PADDLE_ON_INFERENCE
|
||||
#define PADDLE_CAPI_EXPORT __declspec(dllexport)
|
||||
#else
|
||||
#define PADDLE_CAPI_EXPORT __declspec(dllimport)
|
||||
#endif // PADDLE_ON_INFERENCE
|
||||
#else
|
||||
#define PADDLE_CAPI_EXPORT __attribute__((visibility("default")))
|
||||
#endif // _WIN32
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum PD_DataType { PD_FLOAT32, PD_INT32, PD_INT64, PD_UINT8, PD_UNKDTYPE };
|
||||
|
||||
typedef enum PD_DataType PD_DataType;
|
||||
|
||||
typedef struct PD_PaddleBuf PD_PaddleBuf;
|
||||
typedef struct PD_AnalysisConfig PD_AnalysisConfig;
|
||||
typedef struct PD_Predictor PD_Predictor;
|
||||
|
||||
typedef struct PD_Buffer {
|
||||
void* data;
|
||||
size_t length;
|
||||
size_t capacity;
|
||||
} PD_Buffer;
|
||||
|
||||
typedef struct PD_ZeroCopyTensor {
|
||||
PD_Buffer data;
|
||||
PD_Buffer shape;
|
||||
PD_Buffer lod;
|
||||
PD_DataType dtype;
|
||||
char* name;
|
||||
} PD_ZeroCopyTensor;
|
||||
|
||||
PADDLE_CAPI_EXPORT extern PD_ZeroCopyTensor* PD_NewZeroCopyTensor();
|
||||
PADDLE_CAPI_EXPORT extern void PD_DeleteZeroCopyTensor(PD_ZeroCopyTensor*);
|
||||
PADDLE_CAPI_EXPORT extern void PD_InitZeroCopyTensor(PD_ZeroCopyTensor*);
|
||||
PADDLE_CAPI_EXPORT extern void PD_DestroyZeroCopyTensor(PD_ZeroCopyTensor*);
|
||||
PADDLE_CAPI_EXPORT extern void PD_DeleteZeroCopyTensor(PD_ZeroCopyTensor*);
|
||||
|
||||
typedef struct PD_ZeroCopyData {
|
||||
char* name;
|
||||
void* data;
|
||||
PD_DataType dtype;
|
||||
int* shape;
|
||||
int shape_size;
|
||||
} PD_ZeroCopyData;
|
||||
typedef struct InTensorShape {
|
||||
char* name;
|
||||
int* tensor_shape;
|
||||
int shape_size;
|
||||
} InTensorShape;
|
||||
|
||||
PADDLE_CAPI_EXPORT extern PD_PaddleBuf* PD_NewPaddleBuf();
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_DeletePaddleBuf(PD_PaddleBuf* buf);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_PaddleBufResize(PD_PaddleBuf* buf,
|
||||
size_t length);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_PaddleBufReset(PD_PaddleBuf* buf, void* data,
|
||||
size_t length);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern bool PD_PaddleBufEmpty(PD_PaddleBuf* buf);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void* PD_PaddleBufData(PD_PaddleBuf* buf);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern size_t PD_PaddleBufLength(PD_PaddleBuf* buf);
|
||||
|
||||
// PaddleTensor
|
||||
typedef struct PD_Tensor PD_Tensor;
|
||||
|
||||
PADDLE_CAPI_EXPORT extern PD_Tensor* PD_NewPaddleTensor();
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_DeletePaddleTensor(PD_Tensor* tensor);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_SetPaddleTensorName(PD_Tensor* tensor,
|
||||
char* name);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_SetPaddleTensorDType(PD_Tensor* tensor,
|
||||
PD_DataType dtype);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_SetPaddleTensorData(PD_Tensor* tensor,
|
||||
PD_PaddleBuf* buf);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_SetPaddleTensorShape(PD_Tensor* tensor,
|
||||
int* shape, int size);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern const char* PD_GetPaddleTensorName(
|
||||
const PD_Tensor* tensor);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern PD_DataType PD_GetPaddleTensorDType(
|
||||
const PD_Tensor* tensor);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern PD_PaddleBuf* PD_GetPaddleTensorData(
|
||||
const PD_Tensor* tensor);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern const int* PD_GetPaddleTensorShape(
|
||||
const PD_Tensor* tensor, int* size);
|
||||
|
||||
// AnalysisPredictor
|
||||
PADDLE_CAPI_EXPORT extern bool PD_PredictorRun(const PD_AnalysisConfig* config,
|
||||
PD_Tensor* inputs, int in_size,
|
||||
PD_Tensor** output_data,
|
||||
int* out_size, int batch_size);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern bool PD_PredictorZeroCopyRun(
|
||||
const PD_AnalysisConfig* config, PD_ZeroCopyData* inputs, int in_size,
|
||||
PD_ZeroCopyData** output, int* out_size);
|
||||
|
||||
// AnalysisConfig
|
||||
enum Precision { kFloat32 = 0, kInt8, kHalf };
|
||||
typedef enum Precision Precision;
|
||||
|
||||
PADDLE_CAPI_EXPORT extern PD_AnalysisConfig* PD_NewAnalysisConfig();
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_DeleteAnalysisConfig(
|
||||
PD_AnalysisConfig* config);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_SetModel(PD_AnalysisConfig* config,
|
||||
const char* model_dir,
|
||||
const char* params_path);
|
||||
|
||||
PADDLE_CAPI_EXPORT
|
||||
extern void PD_SetProgFile(PD_AnalysisConfig* config, const char* x);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_SetParamsFile(PD_AnalysisConfig* config,
|
||||
const char* x);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_SetOptimCacheDir(PD_AnalysisConfig* config,
|
||||
const char* opt_cache_dir);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern const char* PD_ModelDir(
|
||||
const PD_AnalysisConfig* config);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern const char* PD_ProgFile(
|
||||
const PD_AnalysisConfig* config);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern const char* PD_ParamsFile(
|
||||
const PD_AnalysisConfig* config);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_EnableUseGpu(PD_AnalysisConfig* config,
|
||||
int memory_pool_init_size_mb,
|
||||
int device_id);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_DisableGpu(PD_AnalysisConfig* config);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern bool PD_UseGpu(const PD_AnalysisConfig* config);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern int PD_GpuDeviceId(const PD_AnalysisConfig* config);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern int PD_MemoryPoolInitSizeMb(
|
||||
const PD_AnalysisConfig* config);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern float PD_FractionOfGpuMemoryForPool(
|
||||
const PD_AnalysisConfig* config);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_EnableCUDNN(PD_AnalysisConfig* config);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern bool PD_CudnnEnabled(const PD_AnalysisConfig* config);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_SwitchIrOptim(PD_AnalysisConfig* config,
|
||||
bool x);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern bool PD_IrOptim(const PD_AnalysisConfig* config);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_SwitchUseFeedFetchOps(
|
||||
PD_AnalysisConfig* config, bool x);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern bool PD_UseFeedFetchOpsEnabled(
|
||||
const PD_AnalysisConfig* config);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_SwitchSpecifyInputNames(
|
||||
PD_AnalysisConfig* config, bool x);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern bool PD_SpecifyInputName(
|
||||
const PD_AnalysisConfig* config);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_EnableTensorRtEngine(
|
||||
PD_AnalysisConfig* config, int workspace_size, int max_batch_size,
|
||||
int min_subgraph_size, Precision precision, bool use_static,
|
||||
bool use_calib_mode);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern bool PD_TensorrtEngineEnabled(
|
||||
const PD_AnalysisConfig* config);
|
||||
|
||||
typedef struct PD_MaxInputShape {
|
||||
char* name;
|
||||
int* shape;
|
||||
int shape_size;
|
||||
} PD_MaxInputShape;
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_SwitchIrDebug(PD_AnalysisConfig* config,
|
||||
bool x);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_EnableMKLDNN(PD_AnalysisConfig* config);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_SetMkldnnCacheCapacity(
|
||||
PD_AnalysisConfig* config, int capacity);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern bool PD_MkldnnEnabled(
|
||||
const PD_AnalysisConfig* config);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_SetCpuMathLibraryNumThreads(
|
||||
PD_AnalysisConfig* config, int cpu_math_library_num_threads);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern int PD_CpuMathLibraryNumThreads(
|
||||
const PD_AnalysisConfig* config);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_EnableMkldnnQuantizer(
|
||||
PD_AnalysisConfig* config);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern bool PD_MkldnnQuantizerEnabled(
|
||||
const PD_AnalysisConfig* config);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_SetModelBuffer(PD_AnalysisConfig* config,
|
||||
const char* prog_buffer,
|
||||
size_t prog_buffer_size,
|
||||
const char* params_buffer,
|
||||
size_t params_buffer_size);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern bool PD_ModelFromMemory(
|
||||
const PD_AnalysisConfig* config);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_EnableMemoryOptim(PD_AnalysisConfig* config);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern bool PD_MemoryOptimEnabled(
|
||||
const PD_AnalysisConfig* config);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_EnableProfile(PD_AnalysisConfig* config);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern bool PD_ProfileEnabled(
|
||||
const PD_AnalysisConfig* config);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_SetInValid(PD_AnalysisConfig* config);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern bool PD_IsValid(const PD_AnalysisConfig* config);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_DisableGlogInfo(PD_AnalysisConfig* config);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_DeletePass(PD_AnalysisConfig* config,
|
||||
char* pass_name);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern PD_Predictor* PD_NewPredictor(
|
||||
const PD_AnalysisConfig* config);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_DeletePredictor(PD_Predictor* predictor);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern int PD_GetInputNum(const PD_Predictor*);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern int PD_GetOutputNum(const PD_Predictor*);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern const char* PD_GetInputName(const PD_Predictor*, int);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern const char* PD_GetOutputName(const PD_Predictor*,
|
||||
int);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_SetZeroCopyInput(
|
||||
PD_Predictor* predictor, const PD_ZeroCopyTensor* tensor);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_GetZeroCopyOutput(PD_Predictor* predictor,
|
||||
PD_ZeroCopyTensor* tensor);
|
||||
|
||||
PADDLE_CAPI_EXPORT extern void PD_ZeroCopyRun(PD_Predictor* predictor);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
|
@ -0,0 +1,52 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"log"
|
||||
"github.com/PaddlePaddle/PaddleOCR/thirdparty/paddleocr-go/ocr"
|
||||
)
|
||||
|
||||
var (
|
||||
confFile string
|
||||
image string
|
||||
imageDir string
|
||||
useServering bool
|
||||
port string
|
||||
)
|
||||
|
||||
func init() {
|
||||
flag.StringVar(&confFile, "config", "config/conf.yaml", "config from ocr system. If not given, will use default config.")
|
||||
flag.StringVar(&image, "image", "", "image to predict. if not given, will use image_dir")
|
||||
flag.StringVar(&imageDir, "image_dir", "", "imgs in dir to be predicted. if not given, will check servering")
|
||||
flag.BoolVar(&useServering, "use_servering", false, "whether to use ocr server. [default: false]")
|
||||
flag.StringVar(&port, "port", "18600", "which port to serve ocr server. [default: 18600].")
|
||||
}
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
sys := ocr.NewOCRSystem(confFile, nil)
|
||||
|
||||
if image != "" {
|
||||
img := ocr.ReadImage(image)
|
||||
defer img.Close()
|
||||
results := sys.PredictOneImage(img)
|
||||
for _, res := range results {
|
||||
log.Println(res)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if imageDir != "" {
|
||||
results := sys.PredictDirImages(imageDir)
|
||||
for k, vs := range results {
|
||||
log.Printf("======== image: %v =======\n", k)
|
||||
for _, res := range vs {
|
||||
log.Println(res)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if useServering {
|
||||
sys.StartServer(port)
|
||||
}
|
||||
}
|
|
@ -15,6 +15,7 @@
|
|||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from ppocr.utils.utility import enable_static_mode
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
@ -43,7 +44,6 @@ logger = initial_logger()
|
|||
from ppocr.utils.save_load import init_model
|
||||
|
||||
|
||||
|
||||
def main():
|
||||
startup_prog, eval_program, place, config, _ = program.preprocess()
|
||||
|
||||
|
@ -72,4 +72,5 @@ def main():
|
|||
|
||||
|
||||
if __name__ == '__main__':
|
||||
enable_static_mode()
|
||||
main()
|
||||
|
|
|
@ -191,10 +191,13 @@ if __name__ == "__main__":
|
|||
if count > 0:
|
||||
total_time += elapse
|
||||
count += 1
|
||||
print("Predict time of %s:" % image_file, elapse)
|
||||
logger.info("The predicted time of img: {} is {}:".format(image_file,
|
||||
elapse))
|
||||
src_im = utility.draw_text_det_res(dt_boxes, image_file)
|
||||
img_name_pure = os.path.split(image_file)[-1] # image_file.split("/")[-1]
|
||||
img_path = os.path.join(draw_img_save, "det_res_%s" % img_name_pure)
|
||||
cv2.imwrite(img_path, src_im)
|
||||
img_name_pure = image_file.split("/")[-1]
|
||||
cv2.imwrite(
|
||||
os.path.join(draw_img_save, "det_res_%s" % img_name_pure), src_im)
|
||||
logger.info("The visualized img saved in {}".format(
|
||||
os.path.join(draw_img_save, "det_res_%s" % img_name_pure)))
|
||||
if count > 1:
|
||||
print("Avg Time:", total_time / (count - 1))
|
||||
logger.info("Avg Time:", total_time / (count - 1))
|
||||
|
|