Merge remote-tracking branch 'origin/dygraph' into dy1
|
@ -24,4 +24,8 @@ output/
|
|||
|
||||
build/
|
||||
dist/
|
||||
paddleocr.egg-info/
|
||||
paddleocr.egg-info/
|
||||
/deploy/android_demo/app/OpenCV/
|
||||
/deploy/android_demo/app/PaddleLite/
|
||||
/deploy/android_demo/app/.cxx/
|
||||
/deploy/android_demo/app/cache/
|
||||
|
|
10
MANIFEST.in
|
@ -1,7 +1,9 @@
|
|||
include LICENSE.txt
|
||||
include LICENSE
|
||||
include README.md
|
||||
|
||||
recursive-include ppocr/utils *.txt utility.py logging.py
|
||||
recursive-include ppocr/data/ *.py
|
||||
recursive-include ppocr/utils *.txt utility.py logging.py network.py
|
||||
recursive-include ppocr/data *.py
|
||||
recursive-include ppocr/postprocess *.py
|
||||
recursive-include tools/infer *.py
|
||||
recursive-include tools/infer *.py
|
||||
recursive-include ppocr/utils/e2e_utils *.py
|
||||
recursive-include ppstructure *.py
|
|
@ -27,7 +27,12 @@ import json
|
|||
import cv2
|
||||
|
||||
|
||||
|
||||
__dir__ = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
sys.path.append(__dir__)
|
||||
sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
|
||||
sys.path.append("..")
|
||||
|
@ -78,7 +83,7 @@ class WindowMixin(object):
|
|||
addActions(menu, actions)
|
||||
return menu
|
||||
|
||||
def toolbar(self, title, actions=None):
|
||||
def toolbar(self, title, actions=None):
|
||||
toolbar = ToolBar(title)
|
||||
toolbar.setObjectName(u'%sToolBar' % title)
|
||||
# toolbar.setOrientation(Qt.Vertical)
|
||||
|
@ -92,13 +97,13 @@ class WindowMixin(object):
|
|||
class MainWindow(QMainWindow, WindowMixin):
|
||||
FIT_WINDOW, FIT_WIDTH, MANUAL_ZOOM = list(range(3))
|
||||
|
||||
def __init__(self, lang="ch", defaultFilename=None, defaultPrefdefClassFile=None, defaultSaveDir=None):
|
||||
def __init__(self, lang="ch", gpu=False, defaultFilename=None, defaultPrefdefClassFile=None, defaultSaveDir=None):
|
||||
super(MainWindow, self).__init__()
|
||||
self.setWindowTitle(__appname__)
|
||||
|
||||
# Load setting in the main thread
|
||||
self.settings = Settings()
|
||||
self.settings.load()
|
||||
self.settings.load()
|
||||
settings = self.settings
|
||||
self.lang = lang
|
||||
# Load string bundle for i18n
|
||||
|
@ -108,7 +113,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
getStr = lambda strId: self.stringBundle.getString(strId)
|
||||
|
||||
self.defaultSaveDir = defaultSaveDir
|
||||
self.ocr = PaddleOCR(use_pdserving=False, use_angle_cls=True, det=True, cls=True, use_gpu=False, lang=lang)
|
||||
self.ocr = PaddleOCR(use_pdserving=False, use_angle_cls=True, det=True, cls=True, use_gpu=gpu, lang=lang)
|
||||
|
||||
if os.path.exists('./data/paddle.png'):
|
||||
result = self.ocr.ocr('./data/paddle.png', cls=True, det=True)
|
||||
|
@ -147,6 +152,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
self.itemsToShapesbox = {}
|
||||
self.shapesToItemsbox = {}
|
||||
self.prevLabelText = getStr('tempLabel')
|
||||
self.noLabelText = getStr('nullLabel')
|
||||
self.model = 'paddle'
|
||||
self.PPreader = None
|
||||
self.autoSaveNum = 5
|
||||
|
@ -158,7 +164,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
filelistLayout = QVBoxLayout()
|
||||
filelistLayout.setContentsMargins(0, 0, 0, 0)
|
||||
filelistLayout.addWidget(self.fileListWidget)
|
||||
|
||||
|
||||
self.AutoRecognition = QToolButton()
|
||||
self.AutoRecognition.setToolButtonStyle(Qt.ToolButtonTextBesideIcon)
|
||||
self.AutoRecognition.setIcon(newIcon('Auto'))
|
||||
|
@ -175,7 +181,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
self.filedock.setObjectName(getStr('files'))
|
||||
self.filedock.setWidget(fileListContainer)
|
||||
self.addDockWidget(Qt.LeftDockWidgetArea, self.filedock)
|
||||
|
||||
|
||||
######## Right area ##########
|
||||
listLayout = QVBoxLayout()
|
||||
listLayout.setContentsMargins(0, 0, 0, 0)
|
||||
|
@ -249,7 +255,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
self.imgsplider.setMaximum(150)
|
||||
self.imgsplider.setSingleStep(1)
|
||||
self.imgsplider.setTickPosition(QSlider.TicksBelow)
|
||||
self.imgsplider.setTickInterval(1)
|
||||
self.imgsplider.setTickInterval(1)
|
||||
op = QGraphicsOpacityEffect()
|
||||
op.setOpacity(0.2)
|
||||
self.imgsplider.setGraphicsEffect(op)
|
||||
|
@ -265,7 +271,9 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
self.zoomWidget = ZoomWidget()
|
||||
self.colorDialog = ColorDialog(parent=self)
|
||||
self.zoomWidgetValue = self.zoomWidget.value()
|
||||
|
||||
|
||||
self.msgBox = QMessageBox()
|
||||
|
||||
########## thumbnail #########
|
||||
hlayout = QHBoxLayout()
|
||||
m = (0, 0, 0, 0)
|
||||
|
@ -293,7 +301,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
self.nextButton.setStyleSheet('border: none;')
|
||||
self.nextButton.clicked.connect(self.openNextImg)
|
||||
self.nextButton.setShortcut('d')
|
||||
|
||||
|
||||
hlayout.addWidget(self.preButton)
|
||||
hlayout.addWidget(self.iconlist)
|
||||
hlayout.addWidget(self.nextButton)
|
||||
|
@ -302,7 +310,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
iconListContainer = QWidget()
|
||||
iconListContainer.setLayout(hlayout)
|
||||
iconListContainer.setFixedHeight(100)
|
||||
|
||||
|
||||
########### Canvas ###########
|
||||
self.canvas = Canvas(parent=self)
|
||||
self.canvas.zoomRequest.connect(self.zoomRequest)
|
||||
|
@ -359,6 +367,9 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
opendir = action(getStr('openDir'), self.openDirDialog,
|
||||
'Ctrl+u', 'open', getStr('openDir'))
|
||||
|
||||
open_dataset_dir = action(getStr('openDatasetDir'), self.openDatasetDirDialog,
|
||||
'Ctrl+p', 'open', getStr('openDatasetDir'), enabled=False)
|
||||
|
||||
save = action(getStr('save'), self.saveFile,
|
||||
'Ctrl+V', 'verify', getStr('saveDetail'), enabled=False)
|
||||
|
||||
|
@ -397,6 +408,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
help = action(getStr('tutorial'), self.showTutorialDialog, None, 'help', getStr('tutorialDetail'))
|
||||
showInfo = action(getStr('info'), self.showInfoDialog, None, 'help', getStr('info'))
|
||||
showSteps = action(getStr('steps'), self.showStepsDialog, None, 'help', getStr('steps'))
|
||||
showKeys = action(getStr('keys'), self.showKeysDialog, None, 'help', getStr('keys'))
|
||||
|
||||
zoom = QWidgetAction(self)
|
||||
zoom.setDefaultWidget(self.zoomWidget)
|
||||
|
@ -437,7 +449,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
AutoRec = action(getStr('autoRecognition'), self.autoRecognition,
|
||||
'', 'Auto', getStr('autoRecognition'), enabled=False)
|
||||
|
||||
reRec = action(getStr('reRecognition'), self.reRecognition,
|
||||
reRec = action(getStr('reRecognition'), self.reRecognition,
|
||||
'Ctrl+Shift+R', 'reRec', getStr('reRecognition'), enabled=False)
|
||||
|
||||
singleRere = action(getStr('singleRe'), self.singleRerecognition,
|
||||
|
@ -455,6 +467,12 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
undoLastPoint = action(getStr("undoLastPoint"), self.canvas.undoLastPoint,
|
||||
'Ctrl+Z', "undo", getStr("undoLastPoint"), enabled=False)
|
||||
|
||||
rotateLeft = action(getStr("rotateLeft"), partial(self.rotateImgAction,1),
|
||||
'Ctrl+Alt+L', "rotateLeft", getStr("rotateLeft"), enabled=False)
|
||||
|
||||
rotateRight = action(getStr("rotateRight"), partial(self.rotateImgAction,-1),
|
||||
'Ctrl+Alt+R', "rotateRight", getStr("rotateRight"), enabled=False)
|
||||
|
||||
undo = action(getStr("undo"), self.undoShapeEdit,
|
||||
'Ctrl+Z', "undo", getStr("undo"), enabled=False)
|
||||
|
||||
|
@ -518,13 +536,14 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
zoom=zoom, zoomIn=zoomIn, zoomOut=zoomOut, zoomOrg=zoomOrg,
|
||||
fitWindow=fitWindow, fitWidth=fitWidth,
|
||||
zoomActions=zoomActions, saveLabel=saveLabel,
|
||||
undo=undo, undoLastPoint=undoLastPoint,
|
||||
undo=undo, undoLastPoint=undoLastPoint,open_dataset_dir=open_dataset_dir,
|
||||
rotateLeft=rotateLeft,rotateRight=rotateRight,
|
||||
fileMenuActions=(
|
||||
opendir, saveLabel, resetAll, quit),
|
||||
opendir, open_dataset_dir, saveLabel, resetAll, quit),
|
||||
beginner=(), advanced=(),
|
||||
editMenu=(createpoly, edit, copy, delete,singleRere,None, undo, undoLastPoint,
|
||||
None, color1, self.drawSquaresOption),
|
||||
beginnerContext=(create, edit, copy, delete, singleRere),
|
||||
None, rotateLeft, rotateRight, None, color1, self.drawSquaresOption),
|
||||
beginnerContext=(create, edit, copy, delete, singleRere, rotateLeft, rotateRight,),
|
||||
advancedContext=(createMode, editMode, edit, copy,
|
||||
delete, shapeLineColor, shapeFillColor),
|
||||
onLoadActive=(
|
||||
|
@ -562,9 +581,9 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
self.autoSaveOption.triggered.connect(self.autoSaveFunc)
|
||||
|
||||
addActions(self.menus.file,
|
||||
(opendir, None, saveLabel, saveRec, self.autoSaveOption, None, resetAll, deleteImg, quit))
|
||||
(opendir, open_dataset_dir, None, saveLabel, saveRec, self.autoSaveOption, None, resetAll, deleteImg, quit))
|
||||
|
||||
addActions(self.menus.help, (showSteps, showInfo))
|
||||
addActions(self.menus.help, (showKeys,showSteps, showInfo))
|
||||
addActions(self.menus.view, (
|
||||
self.displayLabelOption, self.labelDialogOption,
|
||||
None,
|
||||
|
@ -759,6 +778,10 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
msg = stepsInfo(self.lang)
|
||||
QMessageBox.information(self, u'Information', msg)
|
||||
|
||||
def showKeysDialog(self):
|
||||
msg = keysInfo(self.lang)
|
||||
QMessageBox.information(self, u'Information', msg)
|
||||
|
||||
def createShape(self):
|
||||
assert self.beginner()
|
||||
self.canvas.setEditing(False)
|
||||
|
@ -772,6 +795,38 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
self.actions.create.setEnabled(False)
|
||||
self.actions.undoLastPoint.setEnabled(True)
|
||||
|
||||
def rotateImg(self, filename, k, _value):
|
||||
|
||||
self.actions.rotateRight.setEnabled(_value)
|
||||
pix = cv2.imread(filename)
|
||||
pix = np.rot90(pix, k)
|
||||
cv2.imwrite(filename, pix)
|
||||
self.canvas.update()
|
||||
self.loadFile(filename)
|
||||
|
||||
def rotateImgWarn(self):
|
||||
if self.lang == 'ch':
|
||||
self.msgBox.warning (self, "提示", "\n 该图片已经有标注框,旋转操作会打乱标注,建议清除标注框后旋转。")
|
||||
else:
|
||||
self.msgBox.warning (self, "Warn", "\n The picture already has a label box, and rotation will disrupt the label.\
|
||||
It is recommended to clear the label box and rotate it.")
|
||||
|
||||
def rotateImgAction(self, k=1, _value=False):
|
||||
|
||||
filename = self.mImgList[self.currIndex]
|
||||
|
||||
if os.path.exists(filename):
|
||||
if self.itemsToShapesbox:
|
||||
self.rotateImgWarn()
|
||||
else:
|
||||
self.saveFile()
|
||||
self.dirty = False
|
||||
self.rotateImg(filename=filename, k=k, _value=True)
|
||||
else:
|
||||
self.rotateImgWarn()
|
||||
self.actions.rotateRight.setEnabled(False)
|
||||
self.actions.rotateLeft.setEnabled(False)
|
||||
|
||||
def toggleDrawingSensitive(self, drawing=True):
|
||||
"""In the middle of drawing, toggling between modes should be disabled."""
|
||||
self.actions.editMode.setEnabled(not drawing)
|
||||
|
@ -879,7 +934,12 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
self.updateComboBox()
|
||||
|
||||
def updateBoxlist(self):
|
||||
for shape in self.canvas.selectedShapes+[self.canvas.hShape]:
|
||||
self.canvas.selectedShapes_hShape = []
|
||||
if self.canvas.hShape != None:
|
||||
self.canvas.selectedShapes_hShape = self.canvas.selectedShapes + [self.canvas.hShape]
|
||||
else:
|
||||
self.canvas.selectedShapes_hShape = self.canvas.selectedShapes
|
||||
for shape in self.canvas.selectedShapes_hShape:
|
||||
item = self.shapesToItemsbox[shape] # listitem
|
||||
text = [(int(p.x()), int(p.y())) for p in shape.points]
|
||||
item.setText(str(text))
|
||||
|
@ -1020,7 +1080,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
item.setText(str([(int(p.x()), int(p.y())) for p in shape.points]))
|
||||
self.updateComboBox()
|
||||
|
||||
def updateComboBox(self): # TODO:貌似没用
|
||||
def updateComboBox(self):
|
||||
# Get the unique labels and add them to the Combobox.
|
||||
itemsTextList = [str(self.labelList.item(i).text()) for i in range(self.labelList.count())]
|
||||
|
||||
|
@ -1040,7 +1100,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
return dict(label=s.label, # str
|
||||
line_color=s.line_color.getRgb(),
|
||||
fill_color=s.fill_color.getRgb(),
|
||||
points=[(p.x(), p.y()) for p in s.points], # QPonitF
|
||||
points=[(int(p.x()), int(p.y())) for p in s.points], # QPonitF
|
||||
# add chris
|
||||
difficult=s.difficult) # bool
|
||||
|
||||
|
@ -1069,7 +1129,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
# print('Image:{0} -> Annotation:{1}'.format(self.filePath, annotationFilePath))
|
||||
return True
|
||||
except:
|
||||
self.errorMessage(u'Error saving label data')
|
||||
self.errorMessage(u'Error saving label data', u'Error saving label data')
|
||||
return False
|
||||
|
||||
def copySelectedShape(self):
|
||||
|
@ -1238,6 +1298,8 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
|
||||
def loadFile(self, filePath=None):
|
||||
"""Load the specified file, or the last opened file if None."""
|
||||
if self.dirty:
|
||||
self.mayContinue()
|
||||
self.resetState()
|
||||
self.canvas.setEnabled(False)
|
||||
if filePath is None:
|
||||
|
@ -1266,7 +1328,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
titem = self.iconlist.item(i)
|
||||
titem.setSelected(True)
|
||||
self.iconlist.scrollToItem(titem)
|
||||
break
|
||||
break
|
||||
else:
|
||||
self.fileListWidget.clear()
|
||||
self.mImgList.clear()
|
||||
|
@ -1274,7 +1336,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
|
||||
# if unicodeFilePath and self.iconList.count() > 0:
|
||||
# if unicodeFilePath in self.mImgList:
|
||||
|
||||
|
||||
if unicodeFilePath and os.path.exists(unicodeFilePath):
|
||||
self.canvas.verified = False
|
||||
|
||||
|
@ -1305,7 +1367,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
self.addRecentFile(self.filePath)
|
||||
self.toggleActions(True)
|
||||
self.showBoundingBoxFromPPlabel(filePath)
|
||||
|
||||
|
||||
self.setWindowTitle(__appname__ + ' ' + filePath)
|
||||
|
||||
# Default : select last item if there is at least one item
|
||||
|
@ -1317,7 +1379,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
return True
|
||||
return False
|
||||
|
||||
|
||||
|
||||
def showBoundingBoxFromPPlabel(self, filePath):
|
||||
imgidx = self.getImglabelidx(filePath)
|
||||
if imgidx not in self.PPlabel.keys():
|
||||
|
@ -1410,6 +1472,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
|
||||
def loadRecent(self, filename):
|
||||
if self.mayContinue():
|
||||
print(filename,"======")
|
||||
self.loadFile(filename)
|
||||
|
||||
def scanAllImages(self, folderPath):
|
||||
|
@ -1445,6 +1508,23 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
self.lastOpenDir = targetDirPath
|
||||
self.importDirImages(targetDirPath)
|
||||
|
||||
def openDatasetDirDialog(self,):
|
||||
if self.lastOpenDir and os.path.exists(self.lastOpenDir):
|
||||
if platform.system() == 'Windows':
|
||||
os.startfile(self.lastOpenDir)
|
||||
else:
|
||||
os.system('open ' + os.path.normpath(self.lastOpenDir))
|
||||
defaultOpenDirPath = self.lastOpenDir
|
||||
|
||||
else:
|
||||
if self.lang == 'ch':
|
||||
self.msgBox.warning(self, "提示", "\n 原文件夹已不存在,请从新选择数据集路径!")
|
||||
else:
|
||||
self.msgBox.warning(self, "Warn", "\n The original folder no longer exists, please choose the data set path again!")
|
||||
|
||||
self.actions.open_dataset_dir.setEnabled(False)
|
||||
defaultOpenDirPath = os.path.dirname(self.filePath) if self.filePath else '.'
|
||||
|
||||
def importDirImages(self, dirpath, isDelete = False):
|
||||
if not self.mayContinue() or not dirpath:
|
||||
return
|
||||
|
@ -1492,6 +1572,10 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
self.reRecogButton.setEnabled(True)
|
||||
self.actions.AutoRec.setEnabled(True)
|
||||
self.actions.reRec.setEnabled(True)
|
||||
self.actions.open_dataset_dir.setEnabled(True)
|
||||
self.actions.rotateLeft.setEnabled(True)
|
||||
self.actions.rotateRight.setEnabled(True)
|
||||
|
||||
|
||||
|
||||
def openPrevImg(self, _value=False):
|
||||
|
@ -1500,7 +1584,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
|
||||
if self.filePath is None:
|
||||
return
|
||||
|
||||
|
||||
currIndex = self.mImgList.index(self.filePath)
|
||||
self.mImgList5 = self.mImgList[:5]
|
||||
if currIndex - 1 >= 0:
|
||||
|
@ -1530,7 +1614,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
if filename:
|
||||
print('file name in openNext is ',filename)
|
||||
self.loadFile(filename)
|
||||
|
||||
|
||||
def updateFileListIcon(self, filename):
|
||||
pass
|
||||
|
||||
|
@ -1642,7 +1726,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
proc.startDetached(os.path.abspath(__file__))
|
||||
|
||||
def mayContinue(self): #
|
||||
if not self.dirty:
|
||||
if not self.dirty:
|
||||
return True
|
||||
else:
|
||||
discardChanges = self.discardChangesDialog()
|
||||
|
@ -1802,10 +1886,14 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
result.insert(0, box)
|
||||
print('result in reRec is ', result)
|
||||
self.result_dic.append(result)
|
||||
if result[1][0] == shape.label:
|
||||
print('label no change')
|
||||
else:
|
||||
rec_flag += 1
|
||||
else:
|
||||
print('Can not recognise the box')
|
||||
self.result_dic.append([box,(self.noLabelText,0)])
|
||||
|
||||
if self.noLabelText == shape.label or result[1][0] == shape.label:
|
||||
print('label no change')
|
||||
else:
|
||||
rec_flag += 1
|
||||
|
||||
if len(self.result_dic) > 0 and rec_flag > 0:
|
||||
self.saveFile(mode='Auto')
|
||||
|
@ -1836,9 +1924,14 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
print('label no change')
|
||||
else:
|
||||
shape.label = result[1][0]
|
||||
self.singleLabel(shape)
|
||||
self.setDirty()
|
||||
print(box)
|
||||
else:
|
||||
print('Can not recognise the box')
|
||||
if self.noLabelText == shape.label:
|
||||
print('label no change')
|
||||
else:
|
||||
shape.label = self.noLabelText
|
||||
self.singleLabel(shape)
|
||||
self.setDirty()
|
||||
|
||||
def autolcm(self):
|
||||
vbox = QVBoxLayout()
|
||||
|
@ -2027,6 +2120,8 @@ def read(filename, default=None):
|
|||
except:
|
||||
return default
|
||||
|
||||
def str2bool(v):
|
||||
return v.lower() in ("true", "t", "1")
|
||||
|
||||
def get_main_app(argv=[]):
|
||||
"""
|
||||
|
@ -2038,13 +2133,14 @@ def get_main_app(argv=[]):
|
|||
app.setWindowIcon(newIcon("app"))
|
||||
# Tzutalin 201705+: Accept extra agruments to change predefined class file
|
||||
argparser = argparse.ArgumentParser()
|
||||
argparser.add_argument("--lang", default='en', nargs="?")
|
||||
argparser.add_argument("--lang", type=str, default='en', nargs="?")
|
||||
argparser.add_argument("--gpu", type=str2bool, default=False, nargs="?")
|
||||
argparser.add_argument("--predefined_classes_file",
|
||||
default=os.path.join(os.path.dirname(__file__), "data", "predefined_classes.txt"),
|
||||
nargs="?")
|
||||
args = argparser.parse_args(argv[1:])
|
||||
# Usage : labelImg.py image predefClassFile saveDir
|
||||
win = MainWindow(lang=args.lang,
|
||||
win = MainWindow(lang=args.lang, gpu=args.gpu,
|
||||
defaultPrefdefClassFile=args.predefined_classes_file)
|
||||
win.show()
|
||||
return app, win
|
||||
|
@ -2057,7 +2153,7 @@ def main():
|
|||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
|
||||
resource_file = './libs/resources.py'
|
||||
if not os.path.exists(resource_file):
|
||||
output = os.system('pyrcc5 -o libs/resources.py resources.qrc')
|
||||
|
|
|
@ -8,9 +8,12 @@ PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field, w
|
|||
|
||||
### Recent Update
|
||||
|
||||
- 2021.8.11:
|
||||
- New functions: Open the dataset folder, image rotation (Note: Please delete the label box before rotating the image) (by [Wei-JL](https://github.com/Wei-JL))
|
||||
- Added shortcut key description (Help-Shortcut Key), repaired the direction shortcut key movement function under batch processing (by [d2623587501](https://github.com/d2623587501))
|
||||
- 2021.2.5: New batch processing and undo functions (by [Evezerest](https://github.com/Evezerest)):
|
||||
- Batch processing function: Press and hold the Ctrl key to select the box, you can move, copy, and delete in batches.
|
||||
- Undo function: In the process of drawing a four-point label box or after editing the box, press Ctrl+Z to undo the previous operation.
|
||||
- **Batch processing function**: Press and hold the Ctrl key to select the box, you can move, copy, and delete in batches.
|
||||
- **Undo function**: In the process of drawing a four-point label box or after editing the box, press Ctrl+Z to undo the previous operation.
|
||||
- Fix image rotation and size problems, optimize the process of editing the mark frame (by [ninetailskim](https://github.com/ninetailskim)、 [edencfc](https://github.com/edencfc)).
|
||||
- 2021.1.11: Optimize the labeling experience (by [edencfc](https://github.com/edencfc)),
|
||||
- Users can choose whether to pop up the label input dialog after drawing the detection box in "View - Pop-up Label Input Dialog".
|
||||
|
@ -23,17 +26,51 @@ PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field, w
|
|||
|
||||
## Installation
|
||||
|
||||
### 1. Install PaddleOCR
|
||||
### 1. Environment Preparation
|
||||
|
||||
PaddleOCR models has been built in PPOCRLabel, please refer to [PaddleOCR installation document](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/installation.md) to prepare PaddleOCR and make sure it works.
|
||||
#### **Install PaddlePaddle 2.0**
|
||||
|
||||
```bash
|
||||
pip3 install --upgrade pip
|
||||
|
||||
# If you have cuda9 or cuda10 installed on your machine, please run the following command to install
|
||||
python3 -m pip install paddlepaddle-gpu==2.0.0 -i https://mirror.baidu.com/pypi/simple
|
||||
|
||||
# If you only have cpu on your machine, please run the following command to install
|
||||
python3 -m pip install paddlepaddle==2.0.0 -i https://mirror.baidu.com/pypi/simple
|
||||
```
|
||||
|
||||
For more software version requirements, please refer to the instructions in [Installation Document](https://www.paddlepaddle.org.cn/install/quick) for operation.
|
||||
|
||||
#### **Install PaddleOCR**
|
||||
|
||||
```bash
|
||||
# Recommend
|
||||
git clone https://github.com/PaddlePaddle/PaddleOCR
|
||||
|
||||
# If you cannot pull successfully due to network problems, you can also choose to use the code hosting on the cloud:
|
||||
|
||||
git clone https://gitee.com/paddlepaddle/PaddleOCR
|
||||
|
||||
# Note: The cloud-hosting code may not be able to synchronize the update with this GitHub project in real time. There might be a delay of 3-5 days. Please give priority to the recommended method.
|
||||
```
|
||||
|
||||
#### **Install Third-party Libraries**
|
||||
|
||||
```bash
|
||||
cd PaddleOCR
|
||||
pip3 install -r requirements.txt
|
||||
```
|
||||
|
||||
If you getting this error `OSError: [WinError 126] The specified module could not be found` when you install shapely on windows. Please try to download Shapely whl file using http://www.lfd.uci.edu/~gohlke/pythonlibs/#shapely.
|
||||
|
||||
Reference: [Solve shapely installation on windows](https://stackoverflow.com/questions/44398265/install-shapely-oserror-winerror-126-the-specified-module-could-not-be-found)
|
||||
|
||||
### 2. Install PPOCRLabel
|
||||
|
||||
#### Windows + Anaconda
|
||||
#### Windows
|
||||
|
||||
Download and install [Anaconda](https://www.anaconda.com/download/#download) (Python 3+)
|
||||
|
||||
```
|
||||
```bash
|
||||
pip install pyqt5
|
||||
cd ./PPOCRLabel # Change the directory to the PPOCRLabel folder
|
||||
python PPOCRLabel.py
|
||||
|
@ -41,15 +78,15 @@ python PPOCRLabel.py
|
|||
|
||||
#### Ubuntu Linux
|
||||
|
||||
```
|
||||
```bash
|
||||
pip3 install pyqt5
|
||||
pip3 install trash-cli
|
||||
cd ./PPOCRLabel # Change the directory to the PPOCRLabel folder
|
||||
python3 PPOCRLabel.py
|
||||
```
|
||||
|
||||
#### macOS
|
||||
```
|
||||
#### MacOS
|
||||
```bash
|
||||
pip3 install pyqt5
|
||||
pip3 uninstall opencv-python # Uninstall opencv manually as it conflicts with pyqt
|
||||
pip3 install opencv-contrib-python-headless==4.2.0.32 # Install the headless version of opencv
|
||||
|
@ -79,11 +116,11 @@ python3 PPOCRLabel.py
|
|||
|
||||
7. Double click the result in 'recognition result' list to manually change inaccurate recognition results.
|
||||
|
||||
8. Click "Check", the image status will switch to "√",then the program automatically jump to the next.
|
||||
8. **Click "Check", the image status will switch to "√",then the program automatically jump to the next.**
|
||||
|
||||
9. Click "Delete Image" and the image will be deleted to the recycle bin.
|
||||
|
||||
10. Labeling result: the user can save manually through the menu "File - Save Label", while the program will also save automatically if "File - Auto Save Label Mode" is selected. The manually checked label will be stored in *Label.txt* under the opened picture folder. Click "PaddleOCR"-"Save Recognition Results" in the menu bar, the recognition training data of such pictures will be saved in the *crop_img* folder, and the recognition label will be saved in *rec_gt.txt*<sup>[4]</sup>.
|
||||
10. Labeling result: the user can export the label result manually through the menu "File - Export Label", while the program will also export automatically if "File - Auto export Label Mode" is selected. The manually checked label will be stored in *Label.txt* under the opened picture folder. Click "File"-"Export Recognition Results" in the menu bar, the recognition training data of such pictures will be saved in the *crop_img* folder, and the recognition label will be saved in *rec_gt.txt*<sup>[4]</sup>.
|
||||
|
||||
### Note
|
||||
|
||||
|
@ -97,10 +134,10 @@ python3 PPOCRLabel.py
|
|||
|
||||
| File name | Description |
|
||||
| :-----------: | :----------------------------------------------------------: |
|
||||
| Label.txt | The detection label file can be directly used for PPOCR detection model training. After the user saves 5 label results, the file will be automatically saved. It will also be written when the user closes the application or changes the file folder. |
|
||||
| Label.txt | The detection label file can be directly used for PPOCR detection model training. After the user saves 5 label results, the file will be automatically exported. It will also be written when the user closes the application or changes the file folder. |
|
||||
| fileState.txt | The picture status file save the image in the current folder that has been manually confirmed by the user. |
|
||||
| Cache.cach | Cache files to save the results of model recognition. |
|
||||
| rec_gt.txt | The recognition label file, which can be directly used for PPOCR identification model training, is generated after the user clicks on the menu bar "File"-"Save recognition result". |
|
||||
| rec_gt.txt | The recognition label file, which can be directly used for PPOCR identification model training, is generated after the user clicks on the menu bar "File"-"Export recognition result". |
|
||||
| crop_img | The recognition data, generated at the same time with *rec_gt.txt* |
|
||||
|
||||
## Explanation
|
||||
|
@ -134,16 +171,16 @@ python3 PPOCRLabel.py
|
|||
|
||||
- Custom model: The model trained by users can be replaced by modifying PPOCRLabel.py in [PaddleOCR class instantiation](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/PPOCRLabel/PPOCRLabel.py#L110) referring [Custom Model Code](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/whl_en.md#use-custom-model)
|
||||
|
||||
### Save
|
||||
### Export Label Result
|
||||
|
||||
PPOCRLabel supports three ways to save Label.txt
|
||||
PPOCRLabel supports three ways to export Label.txt
|
||||
|
||||
- Automatically save: After selecting "File - Auto Save Label Mode", the program will automatically write the annotations into Label.txt every time the user confirms an image. If this option is not turned on, it will be automatically saved after detecting that the user has manually checked 5 images.
|
||||
- Manual save: Click "File-Save Marking Results" to manually save the label.
|
||||
- Close application save
|
||||
- Automatically export: After selecting "File - Auto Export Label Mode", the program will automatically write the annotations into Label.txt every time the user confirms an image. If this option is not turned on, it will be automatically exported after detecting that the user has manually checked 5 images.
|
||||
- Manual export: Click "File-Export Marking Results" to manually export the label.
|
||||
- Close application export
|
||||
|
||||
|
||||
### Export partial recognition results
|
||||
### Export Partial Recognition Results
|
||||
|
||||
For some data that are difficult to recognize, the recognition results will not be exported by **unchecking** the corresponding tags in the recognition results checkbox.
|
||||
|
||||
|
|
|
@ -8,9 +8,12 @@ PPOCRLabel是一款适用于OCR领域的半自动化图形标注工具,内置P
|
|||
|
||||
#### 近期更新
|
||||
|
||||
- 2021.8.11:
|
||||
- 新增功能:打开数据所在文件夹、图像旋转(注意:旋转前的图片上不能存在标记框)(by [Wei-JL](https://github.com/Wei-JL))
|
||||
- 新增快捷键说明(帮助-快捷键)、修复批处理下的方向快捷键移动功能(by [d2623587501](https://github.com/d2623587501))
|
||||
- 2021.2.5:新增批处理与撤销功能(by [Evezerest](https://github.com/Evezerest))
|
||||
- 批处理功能:按住Ctrl键选择标记框后可批量移动、复制、删除。
|
||||
- 撤销功能:在绘制四点标注框过程中或对框进行编辑操作后,按下Ctrl+Z可撤销上一部操作。
|
||||
- **批处理功能**:按住Ctrl键选择标记框后可批量移动、复制、删除、重新识别。
|
||||
- **撤销功能**:在绘制四点标注框过程中或对框进行编辑操作后,按下Ctrl+Z可撤销上一部操作。
|
||||
- 修复图像旋转和尺寸问题、优化编辑标记框过程(by [ninetailskim](https://github.com/ninetailskim)、 [edencfc](https://github.com/edencfc))
|
||||
- 2021.1.11:优化标注体验(by [edencfc](https://github.com/edencfc)):
|
||||
- 用户可在“视图 - 弹出标记输入框”选择在画完检测框后标记输入框是否弹出。
|
||||
|
@ -27,13 +30,48 @@ PPOCRLabel是一款适用于OCR领域的半自动化图形标注工具,内置P
|
|||
|
||||
## 安装
|
||||
|
||||
### 1. 安装PaddleOCR
|
||||
PPOCRLabel内置PaddleOCR模型,故请参考[PaddleOCR安装文档](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/installation.md)准备好PaddleOCR,并确保PaddleOCR安装成功。
|
||||
### 1. 环境搭建
|
||||
#### 安装PaddlePaddle
|
||||
|
||||
```bash
|
||||
pip3 install --upgrade pip
|
||||
|
||||
如果您的机器安装的是CUDA9或CUDA10,请运行以下命令安装
|
||||
python3 -m pip install paddlepaddle-gpu==2.0.0 -i https://mirror.baidu.com/pypi/simple
|
||||
|
||||
如果您的机器是CPU,请运行以下命令安装
|
||||
|
||||
python3 -m pip install paddlepaddle==2.0.0 -i https://mirror.baidu.com/pypi/simple
|
||||
```
|
||||
|
||||
更多的版本需求,请参照[安装文档](https://www.paddlepaddle.org.cn/install/quick)中的说明进行操作。
|
||||
|
||||
#### **安装PaddleOCR**
|
||||
|
||||
```bash
|
||||
【推荐】git clone https://github.com/PaddlePaddle/PaddleOCR
|
||||
|
||||
如果因为网络问题无法pull成功,也可选择使用码云上的托管:
|
||||
|
||||
git clone https://gitee.com/paddlepaddle/PaddleOCR
|
||||
|
||||
注:码云托管代码可能无法实时同步本github项目更新,存在3~5天延时,请优先使用推荐方式。
|
||||
```
|
||||
|
||||
#### 安装第三方库
|
||||
|
||||
```bash
|
||||
cd PaddleOCR
|
||||
pip3 install -r requirements.txt
|
||||
```
|
||||
|
||||
注意,windows环境下,建议从[这里](https://www.lfd.uci.edu/~gohlke/pythonlibs/#shapely)下载shapely安装包完成安装, 直接通过pip安装的shapely库可能出现`[winRrror 126] 找不到指定模块的问题`。
|
||||
|
||||
### 2. 安装PPOCRLabel
|
||||
#### Windows + Anaconda
|
||||
|
||||
```
|
||||
#### Windows
|
||||
|
||||
```bash
|
||||
pip install pyqt5
|
||||
cd ./PPOCRLabel # 将目录切换到PPOCRLabel文件夹下
|
||||
python PPOCRLabel.py --lang ch
|
||||
|
@ -41,15 +79,15 @@ python PPOCRLabel.py --lang ch
|
|||
|
||||
#### Ubuntu Linux
|
||||
|
||||
```
|
||||
```bash
|
||||
pip3 install pyqt5
|
||||
pip3 install trash-cli
|
||||
cd ./PPOCRLabel # 将目录切换到PPOCRLabel文件夹下
|
||||
python3 PPOCRLabel.py --lang ch
|
||||
```
|
||||
|
||||
#### macOS
|
||||
```
|
||||
#### MacOS
|
||||
```bash
|
||||
pip3 install pyqt5
|
||||
pip3 uninstall opencv-python # 由于mac版本的opencv与pyqt有冲突,需先手动卸载opencv
|
||||
pip3 install opencv-contrib-python-headless==4.2.0.32 # 安装headless版本的open-cv
|
||||
|
@ -57,6 +95,8 @@ cd ./PPOCRLabel # 将目录切换到PPOCRLabel文件夹下
|
|||
python3 PPOCRLabel.py --lang ch
|
||||
```
|
||||
|
||||
|
||||
|
||||
## 使用
|
||||
|
||||
### 操作步骤
|
||||
|
@ -68,9 +108,9 @@ python3 PPOCRLabel.py --lang ch
|
|||
5. 标记框绘制完成后,用户点击 “确认”,检测框会先被预分配一个 “待识别” 标签。
|
||||
6. 重新识别:将图片中的所有检测画绘制/调整完成后,点击 “重新识别”,PPOCR模型会对当前图片中的**所有检测框**重新识别<sup>[3]</sup>。
|
||||
7. 内容更改:双击识别结果,对不准确的识别结果进行手动更改。
|
||||
8. **确认标记**:点击 “确认”,图片状态切换为 “√”,跳转至下一张。
|
||||
8. **确认标记:点击 “确认”,图片状态切换为 “√”,跳转至下一张。**
|
||||
9. 删除:点击 “删除图像”,图片将会被删除至回收站。
|
||||
10. 保存结果:用户可以通过菜单中“文件-保存标记结果”手动保存,同时也可以点击“文件 - 自动保存标记结果”开启自动保存。手动确认过的标记将会被存放在所打开图片文件夹下的*Label.txt*中。在菜单栏点击 “文件” - "保存识别结果"后,会将此类图片的识别训练数据保存在*crop_img*文件夹下,识别标签保存在*rec_gt.txt*中<sup>[4]</sup>。
|
||||
10. 导出结果:用户可以通过菜单中“文件-导出标记结果”手动导出,同时也可以点击“文件 - 自动导出标记结果”开启自动导出。手动确认过的标记将会被存放在所打开图片文件夹下的*Label.txt*中。在菜单栏点击 “文件” - "导出识别结果"后,会将此类图片的识别训练数据保存在*crop_img*文件夹下,识别标签保存在*rec_gt.txt*中<sup>[4]</sup>。
|
||||
|
||||
### 注意
|
||||
|
||||
|
@ -84,10 +124,10 @@ python3 PPOCRLabel.py --lang ch
|
|||
|
||||
| 文件名 | 说明 |
|
||||
| :-----------: | :----------------------------------------------------------: |
|
||||
| Label.txt | 检测标签,可直接用于PPOCR检测模型训练。用户每保存5张检测结果后,程序会进行自动写入。当用户关闭应用程序或切换文件路径后同样会进行写入。 |
|
||||
| Label.txt | 检测标签,可直接用于PPOCR检测模型训练。用户每确认5张检测结果后,程序会进行自动写入。当用户关闭应用程序或切换文件路径后同样会进行写入。 |
|
||||
| fileState.txt | 图片状态标记文件,保存当前文件夹下已经被用户手动确认过的图片名称。 |
|
||||
| Cache.cach | 缓存文件,保存模型自动识别的结果。 |
|
||||
| rec_gt.txt | 识别标签。可直接用于PPOCR识别模型训练。需用户手动点击菜单栏“文件” - "保存识别结果"后产生。 |
|
||||
| rec_gt.txt | 识别标签。可直接用于PPOCR识别模型训练。需用户手动点击菜单栏“文件” - "导出识别结果"后产生。 |
|
||||
| crop_img | 识别数据。按照检测框切割后的图片。与rec_gt.txt同时产生。 |
|
||||
|
||||
## 说明
|
||||
|
@ -120,19 +160,19 @@ python3 PPOCRLabel.py --lang ch
|
|||
|
||||
- 自定义模型:用户可根据[自定义模型代码使用](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/whl.md#%E8%87%AA%E5%AE%9A%E4%B9%89%E6%A8%A1%E5%9E%8B),通过修改PPOCRLabel.py中针对[PaddleOCR类的实例化](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/PPOCRLabel/PPOCRLabel.py#L110)替换成自己训练的模型。
|
||||
|
||||
### 保存方式
|
||||
### 导出标记结果
|
||||
|
||||
PPOCRLabel支持三种保存方式:
|
||||
PPOCRLabel支持三种导出方式:
|
||||
|
||||
- 自动保存:点击“文件 - 自动保存标记结果”后,用户每确认过一张图片,程序自动将标记结果写入Label.txt中。若未开启此选项,则检测到用户手动确认过5张图片后进行自动保存。
|
||||
- 手动保存:点击“文件 - 保存标记结果”手动保存标记。
|
||||
- 关闭应用程序保存
|
||||
- 自动导出:点击“文件 - 自动导出标记结果”后,用户每确认过一张图片,程序自动将标记结果写入Label.txt中。若未开启此选项,则检测到用户手动确认过5张图片后进行自动导出。
|
||||
- 手动导出:点击“文件 - 导出标记结果”手动导出标记。
|
||||
- 关闭应用程序导出
|
||||
|
||||
### 导出部分识别结果
|
||||
|
||||
针对部分难以识别的数据,通过在识别结果的复选框中**取消勾选**相应的标记,其识别结果不会被导出。
|
||||
|
||||
*注意:识别结果中的复选框状态仍需用户手动点击保存后才能保留*
|
||||
*注意:识别结果中的复选框状态仍需用户手动点击确认后才能保留*
|
||||
|
||||
### 错误提示
|
||||
- 如果同时使用whl包安装了paddleocr,其优先级大于通过paddleocr.py调用PaddleOCR类,whl包未更新时会导致程序异常。
|
||||
|
|
|
@ -23,6 +23,7 @@ except ImportError:
|
|||
|
||||
from libs.shape import Shape
|
||||
from libs.utils import distance
|
||||
import copy
|
||||
|
||||
CURSOR_DEFAULT = Qt.ArrowCursor
|
||||
CURSOR_POINT = Qt.PointingHandCursor
|
||||
|
@ -45,7 +46,7 @@ class Canvas(QWidget):
|
|||
CREATE, EDIT = list(range(2))
|
||||
_fill_drawing = False # draw shadows
|
||||
|
||||
epsilon = 11.0
|
||||
epsilon = 5.0
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(Canvas, self).__init__(*args, **kwargs)
|
||||
|
@ -81,6 +82,7 @@ class Canvas(QWidget):
|
|||
self.fourpoint = True # ADD
|
||||
self.pointnum = 0
|
||||
self.movingShape = False
|
||||
self.selectCountShape = False
|
||||
|
||||
#initialisation for panning
|
||||
self.pan_initial_pos = QPoint()
|
||||
|
@ -702,6 +704,10 @@ class Canvas(QWidget):
|
|||
|
||||
def keyPressEvent(self, ev):
|
||||
key = ev.key()
|
||||
shapesBackup = []
|
||||
shapesBackup = copy.deepcopy(self.shapes)
|
||||
self.shapesBackups.pop()
|
||||
self.shapesBackups.append(shapesBackup)
|
||||
if key == Qt.Key_Escape and self.current:
|
||||
print('ESC press')
|
||||
self.current = None
|
||||
|
@ -709,41 +715,48 @@ class Canvas(QWidget):
|
|||
self.update()
|
||||
elif key == Qt.Key_Return and self.canCloseShape():
|
||||
self.finalise()
|
||||
elif key == Qt.Key_Left and self.selectedShape:
|
||||
elif key == Qt.Key_Left and self.selectedShapes:
|
||||
self.moveOnePixel('Left')
|
||||
elif key == Qt.Key_Right and self.selectedShape:
|
||||
elif key == Qt.Key_Right and self.selectedShapes:
|
||||
self.moveOnePixel('Right')
|
||||
elif key == Qt.Key_Up and self.selectedShape:
|
||||
elif key == Qt.Key_Up and self.selectedShapes:
|
||||
self.moveOnePixel('Up')
|
||||
elif key == Qt.Key_Down and self.selectedShape:
|
||||
elif key == Qt.Key_Down and self.selectedShapes:
|
||||
self.moveOnePixel('Down')
|
||||
|
||||
def moveOnePixel(self, direction):
|
||||
# print(self.selectedShape.points)
|
||||
if direction == 'Left' and not self.moveOutOfBound(QPointF(-1.0, 0)):
|
||||
# print("move Left one pixel")
|
||||
self.selectedShape.points[0] += QPointF(-1.0, 0)
|
||||
self.selectedShape.points[1] += QPointF(-1.0, 0)
|
||||
self.selectedShape.points[2] += QPointF(-1.0, 0)
|
||||
self.selectedShape.points[3] += QPointF(-1.0, 0)
|
||||
elif direction == 'Right' and not self.moveOutOfBound(QPointF(1.0, 0)):
|
||||
# print("move Right one pixel")
|
||||
self.selectedShape.points[0] += QPointF(1.0, 0)
|
||||
self.selectedShape.points[1] += QPointF(1.0, 0)
|
||||
self.selectedShape.points[2] += QPointF(1.0, 0)
|
||||
self.selectedShape.points[3] += QPointF(1.0, 0)
|
||||
elif direction == 'Up' and not self.moveOutOfBound(QPointF(0, -1.0)):
|
||||
# print("move Up one pixel")
|
||||
self.selectedShape.points[0] += QPointF(0, -1.0)
|
||||
self.selectedShape.points[1] += QPointF(0, -1.0)
|
||||
self.selectedShape.points[2] += QPointF(0, -1.0)
|
||||
self.selectedShape.points[3] += QPointF(0, -1.0)
|
||||
elif direction == 'Down' and not self.moveOutOfBound(QPointF(0, 1.0)):
|
||||
# print("move Down one pixel")
|
||||
self.selectedShape.points[0] += QPointF(0, 1.0)
|
||||
self.selectedShape.points[1] += QPointF(0, 1.0)
|
||||
self.selectedShape.points[2] += QPointF(0, 1.0)
|
||||
self.selectedShape.points[3] += QPointF(0, 1.0)
|
||||
self.selectCount = len(self.selectedShapes)
|
||||
self.selectCountShape = True
|
||||
for i in range(len(self.selectedShapes)):
|
||||
self.selectedShape = self.selectedShapes[i]
|
||||
if direction == 'Left' and not self.moveOutOfBound(QPointF(-1.0, 0)):
|
||||
# print("move Left one pixel")
|
||||
self.selectedShape.points[0] += QPointF(-1.0, 0)
|
||||
self.selectedShape.points[1] += QPointF(-1.0, 0)
|
||||
self.selectedShape.points[2] += QPointF(-1.0, 0)
|
||||
self.selectedShape.points[3] += QPointF(-1.0, 0)
|
||||
elif direction == 'Right' and not self.moveOutOfBound(QPointF(1.0, 0)):
|
||||
# print("move Right one pixel")
|
||||
self.selectedShape.points[0] += QPointF(1.0, 0)
|
||||
self.selectedShape.points[1] += QPointF(1.0, 0)
|
||||
self.selectedShape.points[2] += QPointF(1.0, 0)
|
||||
self.selectedShape.points[3] += QPointF(1.0, 0)
|
||||
elif direction == 'Up' and not self.moveOutOfBound(QPointF(0, -1.0)):
|
||||
# print("move Up one pixel")
|
||||
self.selectedShape.points[0] += QPointF(0, -1.0)
|
||||
self.selectedShape.points[1] += QPointF(0, -1.0)
|
||||
self.selectedShape.points[2] += QPointF(0, -1.0)
|
||||
self.selectedShape.points[3] += QPointF(0, -1.0)
|
||||
elif direction == 'Down' and not self.moveOutOfBound(QPointF(0, 1.0)):
|
||||
# print("move Down one pixel")
|
||||
self.selectedShape.points[0] += QPointF(0, 1.0)
|
||||
self.selectedShape.points[1] += QPointF(0, 1.0)
|
||||
self.selectedShape.points[2] += QPointF(0, 1.0)
|
||||
self.selectedShape.points[3] += QPointF(0, 1.0)
|
||||
shapesBackup = []
|
||||
shapesBackup = copy.deepcopy(self.shapes)
|
||||
self.shapesBackups.append(shapesBackup)
|
||||
self.shapeMoved.emit()
|
||||
self.repaint()
|
||||
|
||||
|
@ -840,6 +853,7 @@ class Canvas(QWidget):
|
|||
def restoreShape(self):
|
||||
if not self.isShapeRestorable:
|
||||
return
|
||||
|
||||
self.shapesBackups.pop() # latest
|
||||
shapesBackup = self.shapesBackups.pop()
|
||||
self.shapes = shapesBackup
|
||||
|
|
|
@ -124,6 +124,15 @@ def natural_sort(list, key=lambda s:s):
|
|||
|
||||
|
||||
def get_rotate_crop_image(img, points):
|
||||
# Use Green's theory to judge clockwise or counterclockwise
|
||||
# author: biyanhua
|
||||
d = 0.0
|
||||
for index in range(-1, 3):
|
||||
d += -0.5 * (points[index + 1][1] + points[index][1]) * (
|
||||
points[index + 1][0] - points[index][0])
|
||||
if d < 0: # counterclockwise
|
||||
tmp = np.array(points)
|
||||
points[1], points[3] = tmp[3], tmp[1]
|
||||
|
||||
try:
|
||||
img_crop_width = int(
|
||||
|
@ -165,6 +174,7 @@ def stepsInfo(lang='en'):
|
|||
"10. 标注结果:关闭应用程序或切换文件路径后,手动保存过的标签将会被存放在所打开图片文件夹下的" \
|
||||
"*Label.txt*中。在菜单栏点击 “PaddleOCR” - 保存识别结果后,会将此类图片的识别训练数据保存在*crop_img*文件夹下," \
|
||||
"识别标签保存在*rec_gt.txt*中。\n"
|
||||
|
||||
else:
|
||||
msg = "1. Build and launch using the instructions above.\n" \
|
||||
"2. Click 'Open Dir' in Menu/File to select the folder of the picture.\n"\
|
||||
|
@ -178,5 +188,57 @@ def stepsInfo(lang='en'):
|
|||
"8. Click 'Save', the image status will switch to '√',then the program automatically jump to the next.\n"\
|
||||
"9. Click 'Delete Image' and the image will be deleted to the recycle bin.\n"\
|
||||
"10. Labeling result: After closing the application or switching the file path, the manually saved label will be stored in *Label.txt* under the opened picture folder.\n"\
|
||||
" Click PaddleOCR-Save Recognition Results in the menu bar, the recognition training data of such pictures will be saved in the *crop_img* folder, and the recognition label will be saved in *rec_gt.txt*.\n"
|
||||
" Click PaddleOCR-Save Recognition Results in the menu bar, the recognition training data of such pictures will be saved in the *crop_img* folder, and the recognition label will be saved in *rec_gt.txt*.\n"
|
||||
|
||||
return msg
|
||||
|
||||
def keysInfo(lang='en'):
|
||||
if lang == 'ch':
|
||||
msg = "快捷键\t\t\t说明\n" \
|
||||
"———————————————————————\n"\
|
||||
"Ctrl + shift + R\t\t对当前图片的所有标记重新识别\n" \
|
||||
"W\t\t\t新建矩形框\n" \
|
||||
"Q\t\t\t新建四点框\n" \
|
||||
"Ctrl + E\t\t编辑所选框标签\n" \
|
||||
"Ctrl + R\t\t重新识别所选标记\n" \
|
||||
"Ctrl + C\t\t复制并粘贴选中的标记框\n" \
|
||||
"Ctrl + 鼠标左键\t\t多选标记框\n" \
|
||||
"Backspace\t\t删除所选框\n" \
|
||||
"Ctrl + V\t\t确认本张图片标记\n" \
|
||||
"Ctrl + Shift + d\t删除本张图片\n" \
|
||||
"D\t\t\t下一张图片\n" \
|
||||
"A\t\t\t上一张图片\n" \
|
||||
"Ctrl++\t\t\t缩小\n" \
|
||||
"Ctrl--\t\t\t放大\n" \
|
||||
"↑→↓←\t\t\t移动标记框\n" \
|
||||
"———————————————————————\n" \
|
||||
"注:Mac用户Command键替换上述Ctrl键"
|
||||
|
||||
else:
|
||||
msg = "Shortcut Keys\t\tDescription\n" \
|
||||
"———————————————————————\n" \
|
||||
"Ctrl + shift + R\t\tRe-recognize all the labels\n" \
|
||||
"\t\t\tof the current image\n" \
|
||||
"\n"\
|
||||
"W\t\t\tCreate a rect box\n" \
|
||||
"Q\t\t\tCreate a four-points box\n" \
|
||||
"Ctrl + E\t\tEdit label of the selected box\n" \
|
||||
"Ctrl + R\t\tRe-recognize the selected box\n" \
|
||||
"Ctrl + C\t\tCopy and paste the selected\n" \
|
||||
"\t\t\tbox\n" \
|
||||
"\n"\
|
||||
"Ctrl + Left Mouse\tMulti select the label\n" \
|
||||
"Button\t\t\tbox\n" \
|
||||
"\n"\
|
||||
"Backspace\t\tDelete the selected box\n" \
|
||||
"Ctrl + V\t\tCheck image\n" \
|
||||
"Ctrl + Shift + d\tDelete image\n" \
|
||||
"D\t\t\tNext image\n" \
|
||||
"A\t\t\tPrevious image\n" \
|
||||
"Ctrl++\t\t\tZoom in\n" \
|
||||
"Ctrl--\t\t\tZoom out\n" \
|
||||
"↑→↓←\t\t\tMove selected box" \
|
||||
"———————————————————————\n" \
|
||||
"Notice:For Mac users, use the 'Command' key instead of the 'Ctrl' key"
|
||||
|
||||
return msg
|
|
@ -18,6 +18,8 @@
|
|||
<file alias="quit">resources/icons/quit.png</file>
|
||||
<file alias="copy">resources/icons/copy.png</file>
|
||||
<file alias="edit">resources/icons/edit.png</file>
|
||||
<file alias="rotateLeft">resources/icons/rotateLeft.png</file>
|
||||
<file alias="rotateRight">resources/icons/rotateRight.png</file>
|
||||
<file alias="open">resources/icons/open.png</file>
|
||||
<file alias="save">resources/icons/save.png</file>
|
||||
<file alias="format_voc">resources/icons/format_voc.png</file>
|
||||
|
|
After Width: | Height: | Size: 4.1 KiB |
After Width: | Height: | Size: 4.1 KiB |
|
@ -31,6 +31,7 @@ save=确认
|
|||
saveAs=另存为
|
||||
fitWinDetail=缩放到当前窗口大小
|
||||
openDir=打开目录
|
||||
openDatasetDir=打开数据集路径
|
||||
copyPrevBounding=复制当前图像中的上一个边界框
|
||||
showHide=显示/隐藏标签
|
||||
changeSaveFormat=更改存储格式
|
||||
|
@ -85,18 +86,22 @@ detectionBoxposition=检测框位置
|
|||
recognitionResult=识别结果
|
||||
creatPolygon=四点标注
|
||||
drawSquares=正方形标注
|
||||
saveRec=保存识别结果
|
||||
rotateLeft=图片左旋转90度
|
||||
rotateRight=图片右旋转90度
|
||||
saveRec=导出识别结果
|
||||
tempLabel=待识别
|
||||
nullLabel=无法识别
|
||||
steps=操作步骤
|
||||
keys=快捷键
|
||||
choseModelLg=选择模型语言
|
||||
cancel=取消
|
||||
ok=确认
|
||||
autolabeling=自动标注中
|
||||
hideBox=隐藏所有标注
|
||||
showBox=显示所有标注
|
||||
saveLabel=保存标记结果
|
||||
saveLabel=导出标记结果
|
||||
singleRe=重识别此区块
|
||||
labelDialogOption=弹出标记输入框
|
||||
undo=撤销
|
||||
undoLastPoint=撤销上个点
|
||||
autoSaveMode=自动保存标记结果
|
||||
autoSaveMode=自动导出标记结果
|
|
@ -3,6 +3,7 @@ openFileDetail=Open image or label file
|
|||
quit=Quit
|
||||
quitApp=Quit application
|
||||
openDir=Open Dir
|
||||
openDatasetDir=Open DatasetDir
|
||||
copyPrevBounding=Copy previous Bounding Boxes in the current image
|
||||
changeSavedAnnotationDir=Change default saved Annotation dir
|
||||
openAnnotation=Open Annotation
|
||||
|
@ -77,26 +78,30 @@ IR=Image Resize
|
|||
autoRecognition=Auto Recognition
|
||||
reRecognition=Re-recognition
|
||||
mfile=File
|
||||
medit=Eidt
|
||||
medit=Edit
|
||||
mview=View
|
||||
mhelp=Help
|
||||
iconList=Icon List
|
||||
detectionBoxposition=Detection box position
|
||||
recognitionResult=Recognition result
|
||||
creatPolygon=Create Quadrilateral
|
||||
rotateLeft=Left turn 90 degrees
|
||||
rotateRight=Right turn 90 degrees
|
||||
drawSquares=Draw Squares
|
||||
saveRec=Save Recognition Result
|
||||
saveRec=Export Recognition Result
|
||||
tempLabel=TEMPORARY
|
||||
nullLabel=NULL
|
||||
steps=Steps
|
||||
keys=Shortcut Keys
|
||||
choseModelLg=Choose Model Language
|
||||
cancel=Cancel
|
||||
ok=OK
|
||||
autolabeling=Automatic Labeling
|
||||
hideBox=Hide All Box
|
||||
showBox=Show All Box
|
||||
saveLabel=Save Label
|
||||
saveLabel=Export Label
|
||||
singleRe=Re-recognition RectBox
|
||||
labelDialogOption=Pop-up Label Input Dialog
|
||||
undo=Undo
|
||||
undoLastPoint=Undo Last Point
|
||||
autoSaveMode=Auto Save Label Mode
|
||||
autoSaveMode=Auto Export Label Mode
|
|
@ -32,7 +32,8 @@ PaddleOCR supports both dynamic graph and static graph programming paradigm
|
|||
|
||||
<div align="center">
|
||||
<img src="doc/imgs_results/ch_ppocr_mobile_v2.0/test_add_91.jpg" width="800">
|
||||
<img src="doc/imgs_results/ch_ppocr_mobile_v2.0/00018069.jpg" width="800">
|
||||
<img src="doc/imgs_results/multi_lang/img_01.jpg" width="800">
|
||||
<img src="doc/imgs_results/multi_lang/img_02.jpg" width="800">
|
||||
</div>
|
||||
|
||||
The above pictures are the visualizations of the general ppocr_server model. For more effect pictures, please see [More visualizations](./doc/doc_en/visualization_en.md).
|
||||
|
@ -42,7 +43,7 @@ The above pictures are the visualizations of the general ppocr_server model. For
|
|||
- Scan the QR code below with your Wechat, you can access to official technical exchange group. Look forward to your participation.
|
||||
|
||||
<div align="center">
|
||||
<img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.0/doc/joinus.PNG" width = "200" height = "200" />
|
||||
<img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/dygraph/doc/joinus.PNG" width = "200" height = "200" />
|
||||
</div>
|
||||
|
||||
|
||||
|
@ -94,7 +95,7 @@ For a new language request, please refer to [Guideline for new language_requests
|
|||
- [Python Inference](./doc/doc_en/inference_en.md)
|
||||
- [C++ Inference](./deploy/cpp_infer/readme_en.md)
|
||||
- [Serving](./deploy/pdserving/README.md)
|
||||
- [Mobile](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/deploy/lite/readme_en.md)
|
||||
- [Mobile](./deploy/lite/readme_en.md)
|
||||
- [Benchmark](./doc/doc_en/benchmark_en.md)
|
||||
- Data Annotation and Synthesis
|
||||
- [Semi-automatic Annotation Tool: PPOCRLabel](./PPOCRLabel/README.md)
|
||||
|
|
14
README_ch.md
|
@ -8,9 +8,9 @@ PaddleOCR同时支持动态图与静态图两种编程范式
|
|||
- 静态图版本:develop分支
|
||||
|
||||
**近期更新**
|
||||
- 2021.4.8 release 2.1版本,新增AAAI 2021论文[端到端识别算法PGNet](./doc/doc_ch/pgnet.md)开源,[多语言模型](./doc/doc_ch/multi_languages.md)支持种类增加到80+。
|
||||
- 2021.2.1 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题,总数162个,每周一都会更新,欢迎大家持续关注。
|
||||
- 2021.1.26,28,29 PaddleOCR官方研发团队带来技术深入解读三日直播课,1月26日、28日、29日晚上19:30,[直播地址](https://live.bilibili.com/21689802)
|
||||
- 2021.1.21 更新多语言识别模型,目前支持语种超过27种,[多语言模型下载](./doc/doc_ch/models_list.md),包括中文简体、中文繁体、英文、法文、德文、韩文、日文、意大利文、西班牙文、葡萄牙文、俄罗斯文、阿拉伯文等,后续计划可以参考[多语言研发计划](https://github.com/PaddlePaddle/PaddleOCR/issues/1048)
|
||||
- 2021.1.21 更新多语言识别模型,目前支持语种超过27种,包括中文简体、中文繁体、英文、法文、德文、韩文、日文、意大利文、西班牙文、葡萄牙文、俄罗斯文、阿拉伯文等,后续计划可以参考[多语言研发计划](https://github.com/PaddlePaddle/PaddleOCR/issues/1048)
|
||||
- 2020.12.15 更新数据合成工具[Style-Text](./StyleText/README_ch.md),可以批量合成大量与目标场景类似的图像,在多个场景验证,效果明显提升。
|
||||
- 2020.11.25 更新半自动标注工具[PPOCRLabel](./PPOCRLabel/README_ch.md),辅助开发者高效完成标注任务,输出格式与PP-OCR训练任务完美衔接。
|
||||
- 2020.9.22 更新PP-OCR技术文章,https://arxiv.org/abs/2009.09941
|
||||
|
@ -46,7 +46,7 @@ PaddleOCR同时支持动态图与静态图两种编程范式
|
|||
- 微信扫描二维码加入官方交流群,获得更高效的问题答疑,与各行各业开发者充分交流,期待您的加入。
|
||||
|
||||
<div align="center">
|
||||
<img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.0/doc/joinus.PNG" width = "200" height = "200" />
|
||||
<img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/dygraph/doc/joinus.PNG" width = "200" height = "200" />
|
||||
</div>
|
||||
|
||||
## 快速体验
|
||||
|
@ -74,11 +74,13 @@ PaddleOCR同时支持动态图与静态图两种编程范式
|
|||
## 文档教程
|
||||
- [快速安装](./doc/doc_ch/installation.md)
|
||||
- [中文OCR模型快速使用](./doc/doc_ch/quickstart.md)
|
||||
- [多语言OCR模型快速使用](./doc/doc_ch/multi_languages.md)
|
||||
- [代码组织结构](./doc/doc_ch/tree.md)
|
||||
- 算法介绍
|
||||
- [文本检测](./doc/doc_ch/algorithm_overview.md)
|
||||
- [文本识别](./doc/doc_ch/algorithm_overview.md)
|
||||
- [PP-OCR Pipline](#PP-OCR)
|
||||
- [PP-OCR Pipeline](#PP-OCR)
|
||||
- [端到端PGNet算法](./doc/doc_ch/pgnet.md)
|
||||
- 模型训练/评估
|
||||
- [文本检测](./doc/doc_ch/detection.md)
|
||||
- [文本识别](./doc/doc_ch/recognition.md)
|
||||
|
@ -89,7 +91,7 @@ PaddleOCR同时支持动态图与静态图两种编程范式
|
|||
- [基于Python脚本预测引擎推理](./doc/doc_ch/inference.md)
|
||||
- [基于C++预测引擎推理](./deploy/cpp_infer/readme.md)
|
||||
- [服务化部署](./deploy/pdserving/README_CN.md)
|
||||
- [端侧部署](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/deploy/lite/readme.md)
|
||||
- [端侧部署](./deploy/lite/readme.md)
|
||||
- [Benchmark](./doc/doc_ch/benchmark.md)
|
||||
- 数据集
|
||||
- [通用中英文OCR数据集](./doc/doc_ch/datasets.md)
|
||||
|
@ -112,7 +114,7 @@ PaddleOCR同时支持动态图与静态图两种编程范式
|
|||
|
||||
|
||||
<a name="PP-OCR"></a>
|
||||
## PP-OCR Pipline
|
||||
## PP-OCR Pipeline
|
||||
<div align="center">
|
||||
<img src="./doc/ppocr_framework.png" width="800">
|
||||
</div>
|
||||
|
|
|
@ -66,6 +66,7 @@ class StdTextDrawer(object):
|
|||
corpus_list.append(corpus[0:i])
|
||||
text_input_list.append(text_input)
|
||||
corpus = corpus[i:]
|
||||
i = 0
|
||||
break
|
||||
draw.text((char_x, 2), char_i, fill=(0, 0, 0), font=font)
|
||||
char_x += char_size
|
||||
|
@ -78,7 +79,6 @@ class StdTextDrawer(object):
|
|||
|
||||
corpus_list.append(corpus[0:i])
|
||||
text_input_list.append(text_input)
|
||||
corpus = corpus[i:]
|
||||
break
|
||||
|
||||
return corpus_list, text_input_list
|
||||
|
|
|
@ -11,7 +11,8 @@
|
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import paddleocr
|
||||
from .paddleocr import *
|
||||
|
||||
__all__ = ['PaddleOCR', 'draw_ocr']
|
||||
from .paddleocr import PaddleOCR
|
||||
from .tools.infer.utility import draw_ocr
|
||||
__version__ = paddleocr.VERSION
|
||||
__all__ = ['PaddleOCR', 'PPStructure', 'draw_ocr', 'draw_structure_result', 'save_structure_res','download_with_progressbar']
|
||||
|
|
|
@ -7,11 +7,6 @@ Global:
|
|||
save_epoch_step: 1200
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: [3000, 2000]
|
||||
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
|
||||
# from static branch, load_static_weights must be set as True.
|
||||
# 2. If you want to finetune the pretrained models we provide in the docs,
|
||||
# you should set load_static_weights as False.
|
||||
load_static_weights: True
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
|
||||
checkpoints:
|
||||
|
|
|
@ -7,11 +7,6 @@ Global:
|
|||
save_epoch_step: 1200
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: [3000, 2000]
|
||||
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
|
||||
# from static branch, load_static_weights must be set as True.
|
||||
# 2. If you want to finetune the pretrained models we provide in the docs,
|
||||
# you should set load_static_weights as False.
|
||||
load_static_weights: True
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: ./pretrain_models/ResNet18_vd_pretrained
|
||||
checkpoints:
|
||||
|
|
|
@ -0,0 +1,202 @@
|
|||
Global:
|
||||
use_gpu: true
|
||||
epoch_num: 1200
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 2
|
||||
save_model_dir: ./output/ch_db_mv3/
|
||||
save_epoch_step: 1200
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: [3000, 2000]
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
|
||||
checkpoints:
|
||||
save_inference_dir:
|
||||
use_visualdl: False
|
||||
infer_img: doc/imgs_en/img_10.jpg
|
||||
save_res_path: ./output/det_db/predicts_db.txt
|
||||
|
||||
Architecture:
|
||||
name: DistillationModel
|
||||
algorithm: Distillation
|
||||
Models:
|
||||
Student:
|
||||
pretrained: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
|
||||
freeze_params: false
|
||||
return_all_feats: false
|
||||
model_type: det
|
||||
algorithm: DB
|
||||
Backbone:
|
||||
name: MobileNetV3
|
||||
scale: 0.5
|
||||
model_name: large
|
||||
disable_se: True
|
||||
Neck:
|
||||
name: DBFPN
|
||||
out_channels: 96
|
||||
Head:
|
||||
name: DBHead
|
||||
k: 50
|
||||
Student2:
|
||||
pretrained: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
|
||||
freeze_params: false
|
||||
return_all_feats: false
|
||||
model_type: det
|
||||
algorithm: DB
|
||||
Transform:
|
||||
Backbone:
|
||||
name: MobileNetV3
|
||||
scale: 0.5
|
||||
model_name: large
|
||||
disable_se: True
|
||||
Neck:
|
||||
name: DBFPN
|
||||
out_channels: 96
|
||||
Head:
|
||||
name: DBHead
|
||||
k: 50
|
||||
Teacher:
|
||||
pretrained: ./pretrain_models/ch_ppocr_server_v2.0_det_train/best_accuracy
|
||||
freeze_params: true
|
||||
return_all_feats: false
|
||||
model_type: det
|
||||
algorithm: DB
|
||||
Transform:
|
||||
Backbone:
|
||||
name: ResNet
|
||||
layers: 18
|
||||
Neck:
|
||||
name: DBFPN
|
||||
out_channels: 256
|
||||
Head:
|
||||
name: DBHead
|
||||
k: 50
|
||||
|
||||
Loss:
|
||||
name: CombinedLoss
|
||||
loss_config_list:
|
||||
- DistillationDilaDBLoss:
|
||||
weight: 1.0
|
||||
model_name_pairs:
|
||||
- ["Student", "Teacher"]
|
||||
- ["Student2", "Teacher"]
|
||||
key: maps
|
||||
balance_loss: true
|
||||
main_loss_type: DiceLoss
|
||||
alpha: 5
|
||||
beta: 10
|
||||
ohem_ratio: 3
|
||||
- DistillationDMLLoss:
|
||||
model_name_pairs:
|
||||
- ["Student", "Student2"]
|
||||
maps_name: "thrink_maps"
|
||||
weight: 1.0
|
||||
# act: None
|
||||
model_name_pairs: ["Student", "Student2"]
|
||||
key: maps
|
||||
- DistillationDBLoss:
|
||||
weight: 1.0
|
||||
model_name_list: ["Student", "Student2"]
|
||||
# key: maps
|
||||
# name: DBLoss
|
||||
balance_loss: true
|
||||
main_loss_type: DiceLoss
|
||||
alpha: 5
|
||||
beta: 10
|
||||
ohem_ratio: 3
|
||||
|
||||
|
||||
Optimizer:
|
||||
name: Adam
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
||||
lr:
|
||||
name: Cosine
|
||||
learning_rate: 0.001
|
||||
warmup_epoch: 2
|
||||
regularizer:
|
||||
name: 'L2'
|
||||
factor: 0
|
||||
|
||||
PostProcess:
|
||||
name: DistillationDBPostProcess
|
||||
model_name: ["Student", "Student2", "Teacher"]
|
||||
# key: maps
|
||||
thresh: 0.3
|
||||
box_thresh: 0.6
|
||||
max_candidates: 1000
|
||||
unclip_ratio: 1.5
|
||||
|
||||
Metric:
|
||||
name: DistillationMetric
|
||||
base_metric_name: DetMetric
|
||||
main_indicator: hmean
|
||||
key: "Student"
|
||||
|
||||
Train:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: ./train_data/icdar2015/text_localization/
|
||||
label_file_list:
|
||||
- ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
|
||||
ratio_list: [1.0]
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- DetLabelEncode: # Class handling label
|
||||
- IaaAugment:
|
||||
augmenter_args:
|
||||
- { 'type': Fliplr, 'args': { 'p': 0.5 } }
|
||||
- { 'type': Affine, 'args': { 'rotate': [-10, 10] } }
|
||||
- { 'type': Resize, 'args': { 'size': [0.5, 3] } }
|
||||
- EastRandomCropData:
|
||||
size: [960, 960]
|
||||
max_tries: 50
|
||||
keep_ratio: true
|
||||
- MakeBorderMap:
|
||||
shrink_ratio: 0.4
|
||||
thresh_min: 0.3
|
||||
thresh_max: 0.7
|
||||
- MakeShrinkMap:
|
||||
shrink_ratio: 0.4
|
||||
min_text_size: 8
|
||||
- NormalizeImage:
|
||||
scale: 1./255.
|
||||
mean: [0.485, 0.456, 0.406]
|
||||
std: [0.229, 0.224, 0.225]
|
||||
order: 'hwc'
|
||||
- ToCHWImage:
|
||||
- KeepKeys:
|
||||
keep_keys: ['image', 'threshold_map', 'threshold_mask', 'shrink_map', 'shrink_mask'] # the order of the dataloader list
|
||||
loader:
|
||||
shuffle: True
|
||||
drop_last: False
|
||||
batch_size_per_card: 8
|
||||
num_workers: 4
|
||||
|
||||
Eval:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: ./train_data/icdar2015/text_localization/
|
||||
label_file_list:
|
||||
- ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- DetLabelEncode: # Class handling label
|
||||
- DetResizeForTest:
|
||||
# image_shape: [736, 1280]
|
||||
- NormalizeImage:
|
||||
scale: 1./255.
|
||||
mean: [0.485, 0.456, 0.406]
|
||||
std: [0.229, 0.224, 0.225]
|
||||
order: 'hwc'
|
||||
- ToCHWImage:
|
||||
- KeepKeys:
|
||||
keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
|
||||
loader:
|
||||
shuffle: False
|
||||
drop_last: False
|
||||
batch_size_per_card: 1 # must be 1
|
||||
num_workers: 2
|
|
@ -0,0 +1,174 @@
|
|||
Global:
|
||||
use_gpu: true
|
||||
epoch_num: 1200
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 2
|
||||
save_model_dir: ./output/ch_db_mv3/
|
||||
save_epoch_step: 1200
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: [3000, 2000]
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
|
||||
checkpoints:
|
||||
save_inference_dir:
|
||||
use_visualdl: False
|
||||
infer_img: doc/imgs_en/img_10.jpg
|
||||
save_res_path: ./output/det_db/predicts_db.txt
|
||||
|
||||
Architecture:
|
||||
name: DistillationModel
|
||||
algorithm: Distillation
|
||||
Models:
|
||||
Student:
|
||||
pretrained: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
|
||||
freeze_params: false
|
||||
return_all_feats: false
|
||||
model_type: det
|
||||
algorithm: DB
|
||||
Backbone:
|
||||
name: MobileNetV3
|
||||
scale: 0.5
|
||||
model_name: large
|
||||
disable_se: True
|
||||
Neck:
|
||||
name: DBFPN
|
||||
out_channels: 96
|
||||
Head:
|
||||
name: DBHead
|
||||
k: 50
|
||||
Teacher:
|
||||
pretrained: ./pretrain_models/ch_ppocr_server_v2.0_det_train/best_accuracy
|
||||
freeze_params: true
|
||||
return_all_feats: false
|
||||
model_type: det
|
||||
algorithm: DB
|
||||
Transform:
|
||||
Backbone:
|
||||
name: ResNet
|
||||
layers: 18
|
||||
Neck:
|
||||
name: DBFPN
|
||||
out_channels: 256
|
||||
Head:
|
||||
name: DBHead
|
||||
k: 50
|
||||
|
||||
Loss:
|
||||
name: CombinedLoss
|
||||
loss_config_list:
|
||||
- DistillationDilaDBLoss:
|
||||
weight: 1.0
|
||||
model_name_pairs:
|
||||
- ["Student", "Teacher"]
|
||||
key: maps
|
||||
balance_loss: true
|
||||
main_loss_type: DiceLoss
|
||||
alpha: 5
|
||||
beta: 10
|
||||
ohem_ratio: 3
|
||||
- DistillationDBLoss:
|
||||
weight: 1.0
|
||||
model_name_list: ["Student", "Teacher"]
|
||||
# key: maps
|
||||
name: DBLoss
|
||||
balance_loss: true
|
||||
main_loss_type: DiceLoss
|
||||
alpha: 5
|
||||
beta: 10
|
||||
ohem_ratio: 3
|
||||
|
||||
Optimizer:
|
||||
name: Adam
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
||||
lr:
|
||||
name: Cosine
|
||||
learning_rate: 0.001
|
||||
warmup_epoch: 2
|
||||
regularizer:
|
||||
name: 'L2'
|
||||
factor: 0
|
||||
|
||||
PostProcess:
|
||||
name: DistillationDBPostProcess
|
||||
model_name: ["Student", "Student2"]
|
||||
key: head_out
|
||||
thresh: 0.3
|
||||
box_thresh: 0.6
|
||||
max_candidates: 1000
|
||||
unclip_ratio: 1.5
|
||||
|
||||
Metric:
|
||||
name: DistillationMetric
|
||||
base_metric_name: DetMetric
|
||||
main_indicator: hmean
|
||||
key: "Student"
|
||||
|
||||
Train:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: ./train_data/icdar2015/text_localization/
|
||||
label_file_list:
|
||||
- ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
|
||||
ratio_list: [1.0]
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- DetLabelEncode: # Class handling label
|
||||
- IaaAugment:
|
||||
augmenter_args:
|
||||
- { 'type': Fliplr, 'args': { 'p': 0.5 } }
|
||||
- { 'type': Affine, 'args': { 'rotate': [-10, 10] } }
|
||||
- { 'type': Resize, 'args': { 'size': [0.5, 3] } }
|
||||
- EastRandomCropData:
|
||||
size: [960, 960]
|
||||
max_tries: 50
|
||||
keep_ratio: true
|
||||
- MakeBorderMap:
|
||||
shrink_ratio: 0.4
|
||||
thresh_min: 0.3
|
||||
thresh_max: 0.7
|
||||
- MakeShrinkMap:
|
||||
shrink_ratio: 0.4
|
||||
min_text_size: 8
|
||||
- NormalizeImage:
|
||||
scale: 1./255.
|
||||
mean: [0.485, 0.456, 0.406]
|
||||
std: [0.229, 0.224, 0.225]
|
||||
order: 'hwc'
|
||||
- ToCHWImage:
|
||||
- KeepKeys:
|
||||
keep_keys: ['image', 'threshold_map', 'threshold_mask', 'shrink_map', 'shrink_mask'] # the order of the dataloader list
|
||||
loader:
|
||||
shuffle: True
|
||||
drop_last: False
|
||||
batch_size_per_card: 8
|
||||
num_workers: 4
|
||||
|
||||
Eval:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: ./train_data/icdar2015/text_localization/
|
||||
label_file_list:
|
||||
- ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- DetLabelEncode: # Class handling label
|
||||
- DetResizeForTest:
|
||||
# image_shape: [736, 1280]
|
||||
- NormalizeImage:
|
||||
scale: 1./255.
|
||||
mean: [0.485, 0.456, 0.406]
|
||||
std: [0.229, 0.224, 0.225]
|
||||
order: 'hwc'
|
||||
- ToCHWImage:
|
||||
- KeepKeys:
|
||||
keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
|
||||
loader:
|
||||
shuffle: False
|
||||
drop_last: False
|
||||
batch_size_per_card: 1 # must be 1
|
||||
num_workers: 2
|
|
@ -0,0 +1,176 @@
|
|||
Global:
|
||||
use_gpu: true
|
||||
epoch_num: 1200
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 2
|
||||
save_model_dir: ./output/ch_db_mv3/
|
||||
save_epoch_step: 1200
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: [3000, 2000]
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
|
||||
checkpoints:
|
||||
save_inference_dir:
|
||||
use_visualdl: False
|
||||
infer_img: doc/imgs_en/img_10.jpg
|
||||
save_res_path: ./output/det_db/predicts_db.txt
|
||||
|
||||
Architecture:
|
||||
name: DistillationModel
|
||||
algorithm: Distillation
|
||||
Models:
|
||||
Student:
|
||||
pretrained: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
|
||||
freeze_params: false
|
||||
return_all_feats: false
|
||||
model_type: det
|
||||
algorithm: DB
|
||||
Backbone:
|
||||
name: MobileNetV3
|
||||
scale: 0.5
|
||||
model_name: large
|
||||
disable_se: True
|
||||
Neck:
|
||||
name: DBFPN
|
||||
out_channels: 96
|
||||
Head:
|
||||
name: DBHead
|
||||
k: 50
|
||||
Student2:
|
||||
pretrained: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
|
||||
freeze_params: false
|
||||
return_all_feats: false
|
||||
model_type: det
|
||||
algorithm: DB
|
||||
Transform:
|
||||
Backbone:
|
||||
name: MobileNetV3
|
||||
scale: 0.5
|
||||
model_name: large
|
||||
disable_se: True
|
||||
Neck:
|
||||
name: DBFPN
|
||||
out_channels: 96
|
||||
Head:
|
||||
name: DBHead
|
||||
k: 50
|
||||
|
||||
|
||||
Loss:
|
||||
name: CombinedLoss
|
||||
loss_config_list:
|
||||
- DistillationDMLLoss:
|
||||
model_name_pairs:
|
||||
- ["Student", "Student2"]
|
||||
maps_name: "thrink_maps"
|
||||
weight: 1.0
|
||||
act: "softmax"
|
||||
model_name_pairs: ["Student", "Student2"]
|
||||
key: maps
|
||||
- DistillationDBLoss:
|
||||
weight: 1.0
|
||||
model_name_list: ["Student", "Student2"]
|
||||
# key: maps
|
||||
name: DBLoss
|
||||
balance_loss: true
|
||||
main_loss_type: DiceLoss
|
||||
alpha: 5
|
||||
beta: 10
|
||||
ohem_ratio: 3
|
||||
|
||||
|
||||
Optimizer:
|
||||
name: Adam
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
||||
lr:
|
||||
name: Cosine
|
||||
learning_rate: 0.001
|
||||
warmup_epoch: 2
|
||||
regularizer:
|
||||
name: 'L2'
|
||||
factor: 0
|
||||
|
||||
PostProcess:
|
||||
name: DistillationDBPostProcess
|
||||
model_name: ["Student", "Student2"]
|
||||
key: head_out
|
||||
thresh: 0.3
|
||||
box_thresh: 0.6
|
||||
max_candidates: 1000
|
||||
unclip_ratio: 1.5
|
||||
|
||||
Metric:
|
||||
name: DistillationMetric
|
||||
base_metric_name: DetMetric
|
||||
main_indicator: hmean
|
||||
key: "Student"
|
||||
|
||||
Train:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: ./train_data/icdar2015/text_localization/
|
||||
label_file_list:
|
||||
- ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
|
||||
ratio_list: [1.0]
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- DetLabelEncode: # Class handling label
|
||||
- IaaAugment:
|
||||
augmenter_args:
|
||||
- { 'type': Fliplr, 'args': { 'p': 0.5 } }
|
||||
- { 'type': Affine, 'args': { 'rotate': [-10, 10] } }
|
||||
- { 'type': Resize, 'args': { 'size': [0.5, 3] } }
|
||||
- EastRandomCropData:
|
||||
size: [960, 960]
|
||||
max_tries: 50
|
||||
keep_ratio: true
|
||||
- MakeBorderMap:
|
||||
shrink_ratio: 0.4
|
||||
thresh_min: 0.3
|
||||
thresh_max: 0.7
|
||||
- MakeShrinkMap:
|
||||
shrink_ratio: 0.4
|
||||
min_text_size: 8
|
||||
- NormalizeImage:
|
||||
scale: 1./255.
|
||||
mean: [0.485, 0.456, 0.406]
|
||||
std: [0.229, 0.224, 0.225]
|
||||
order: 'hwc'
|
||||
- ToCHWImage:
|
||||
- KeepKeys:
|
||||
keep_keys: ['image', 'threshold_map', 'threshold_mask', 'shrink_map', 'shrink_mask'] # the order of the dataloader list
|
||||
loader:
|
||||
shuffle: True
|
||||
drop_last: False
|
||||
batch_size_per_card: 8
|
||||
num_workers: 4
|
||||
|
||||
Eval:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: ./train_data/icdar2015/text_localization/
|
||||
label_file_list:
|
||||
- ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- DetLabelEncode: # Class handling label
|
||||
- DetResizeForTest:
|
||||
# image_shape: [736, 1280]
|
||||
- NormalizeImage:
|
||||
scale: 1./255.
|
||||
mean: [0.485, 0.456, 0.406]
|
||||
std: [0.229, 0.224, 0.225]
|
||||
order: 'hwc'
|
||||
- ToCHWImage:
|
||||
- KeepKeys:
|
||||
keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
|
||||
loader:
|
||||
shuffle: False
|
||||
drop_last: False
|
||||
batch_size_per_card: 1 # must be 1
|
||||
num_workers: 2
|
|
@ -7,11 +7,6 @@ Global:
|
|||
save_epoch_step: 1200
|
||||
# evaluation is run every 2000 iterations
|
||||
eval_batch_step: [0, 2000]
|
||||
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
|
||||
# from static branch, load_static_weights must be set as True.
|
||||
# 2. If you want to finetune the pretrained models we provide in the docs,
|
||||
# you should set load_static_weights as False.
|
||||
load_static_weights: True
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
|
||||
checkpoints:
|
||||
|
|
|
@ -7,11 +7,6 @@ Global:
|
|||
save_epoch_step: 1000
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: [4000, 5000]
|
||||
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
|
||||
# from static branch, load_static_weights must be set as True.
|
||||
# 2. If you want to finetune the pretrained models we provide in the docs,
|
||||
# you should set load_static_weights as False.
|
||||
load_static_weights: True
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
|
||||
checkpoints:
|
||||
|
|
|
@ -7,11 +7,6 @@ Global:
|
|||
save_epoch_step: 1200
|
||||
# evaluation is run every 2000 iterations
|
||||
eval_batch_step: [0,2000]
|
||||
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
|
||||
# from static branch, load_static_weights must be set as True.
|
||||
# 2. If you want to finetune the pretrained models we provide in the docs,
|
||||
# you should set load_static_weights as False.
|
||||
load_static_weights: True
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained
|
||||
checkpoints:
|
||||
|
|
|
@ -7,11 +7,6 @@ Global:
|
|||
save_epoch_step: 1000
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: [4000, 5000]
|
||||
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
|
||||
# from static branch, load_static_weights must be set as True.
|
||||
# 2. If you want to finetune the pretrained models we provide in the docs,
|
||||
# you should set load_static_weights as False.
|
||||
load_static_weights: True
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: ./pretrain_models/ResNet50_vd_pretrained/
|
||||
checkpoints:
|
||||
|
|
|
@ -7,19 +7,15 @@ Global:
|
|||
save_epoch_step: 1000
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: [4000, 5000]
|
||||
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
|
||||
# from static branch, load_static_weights must be set as True.
|
||||
# 2. If you want to finetune the pretrained models we provide in the docs,
|
||||
# you should set load_static_weights as False.
|
||||
load_static_weights: True
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained/
|
||||
checkpoints:
|
||||
checkpoints:
|
||||
save_inference_dir:
|
||||
use_visualdl: False
|
||||
infer_img:
|
||||
infer_img:
|
||||
save_res_path: ./output/sast_r50_vd_ic15/predicts_sast.txt
|
||||
|
||||
|
||||
Architecture:
|
||||
model_type: det
|
||||
algorithm: SAST
|
||||
|
|
|
@ -7,11 +7,6 @@ Global:
|
|||
save_epoch_step: 1000
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: [4000, 5000]
|
||||
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
|
||||
# from static branch, load_static_weights must be set as True.
|
||||
# 2. If you want to finetune the pretrained models we provide in the docs,
|
||||
# you should set load_static_weights as False.
|
||||
load_static_weights: True
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained/
|
||||
checkpoints:
|
||||
|
|
|
@ -0,0 +1,114 @@
|
|||
Global:
|
||||
use_gpu: True
|
||||
epoch_num: 600
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 10
|
||||
save_model_dir: ./output/pgnet_r50_vd_totaltext/
|
||||
save_epoch_step: 10
|
||||
# evaluation is run every 0 iterationss after the 1000th iteration
|
||||
eval_batch_step: [ 0, 1000 ]
|
||||
cal_metric_during_train: False
|
||||
pretrained_model:
|
||||
checkpoints:
|
||||
save_inference_dir:
|
||||
use_visualdl: False
|
||||
infer_img:
|
||||
valid_set: totaltext # two mode: totaltext valid curved words, partvgg valid non-curved words
|
||||
save_res_path: ./output/pgnet_r50_vd_totaltext/predicts_pgnet.txt
|
||||
character_dict_path: ppocr/utils/ic15_dict.txt
|
||||
character_type: EN
|
||||
max_text_length: 50 # the max length in seq
|
||||
max_text_nums: 30 # the max seq nums in a pic
|
||||
tcl_len: 64
|
||||
|
||||
Architecture:
|
||||
model_type: e2e
|
||||
algorithm: PGNet
|
||||
Transform:
|
||||
Backbone:
|
||||
name: ResNet
|
||||
layers: 50
|
||||
Neck:
|
||||
name: PGFPN
|
||||
Head:
|
||||
name: PGHead
|
||||
|
||||
Loss:
|
||||
name: PGLoss
|
||||
tcl_bs: 64
|
||||
max_text_length: 50 # the same as Global: max_text_length
|
||||
max_text_nums: 30 # the same as Global:max_text_nums
|
||||
pad_num: 36 # the length of dict for pad
|
||||
|
||||
Optimizer:
|
||||
name: Adam
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
||||
lr:
|
||||
learning_rate: 0.001
|
||||
regularizer:
|
||||
name: 'L2'
|
||||
factor: 0
|
||||
|
||||
|
||||
PostProcess:
|
||||
name: PGPostProcess
|
||||
score_thresh: 0.5
|
||||
mode: fast # fast or slow two ways
|
||||
|
||||
Metric:
|
||||
name: E2EMetric
|
||||
mode: A # two ways for eval, A: label from txt, B: label from gt_mat
|
||||
gt_mat_dir: ./train_data/total_text/gt # the dir of gt_mat
|
||||
character_dict_path: ppocr/utils/ic15_dict.txt
|
||||
main_indicator: f_score_e2e
|
||||
|
||||
Train:
|
||||
dataset:
|
||||
name: PGDataSet
|
||||
data_dir: ./train_data/total_text/train
|
||||
label_file_list: [./train_data/total_text/train/train.txt]
|
||||
ratio_list: [1.0]
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- E2ELabelEncodeTrain:
|
||||
- PGProcessTrain:
|
||||
batch_size: 14 # same as loader: batch_size_per_card
|
||||
min_crop_size: 24
|
||||
min_text_size: 4
|
||||
max_text_size: 512
|
||||
- KeepKeys:
|
||||
keep_keys: [ 'images', 'tcl_maps', 'tcl_label_maps', 'border_maps','direction_maps', 'training_masks', 'label_list', 'pos_list', 'pos_mask' ] # dataloader will return list in this order
|
||||
loader:
|
||||
shuffle: True
|
||||
drop_last: True
|
||||
batch_size_per_card: 14
|
||||
num_workers: 16
|
||||
|
||||
Eval:
|
||||
dataset:
|
||||
name: PGDataSet
|
||||
data_dir: ./train_data/total_text/test
|
||||
label_file_list: [./train_data/total_text/test/test.txt]
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: RGB
|
||||
channel_first: False
|
||||
- E2ELabelEncodeTest:
|
||||
- E2EResizeForTest:
|
||||
max_side_len: 768
|
||||
- NormalizeImage:
|
||||
scale: 1./255.
|
||||
mean: [ 0.485, 0.456, 0.406 ]
|
||||
std: [ 0.229, 0.224, 0.225 ]
|
||||
order: 'hwc'
|
||||
- ToCHWImage:
|
||||
- KeepKeys:
|
||||
keep_keys: [ 'image', 'shape', 'polys', 'texts', 'ignore_tags', 'img_id']
|
||||
loader:
|
||||
shuffle: False
|
||||
drop_last: False
|
||||
batch_size_per_card: 1 # must be 1
|
||||
num_workers: 2
|
|
@ -19,6 +19,7 @@ Global:
|
|||
max_text_length: 25
|
||||
infer_mode: False
|
||||
use_space_char: True
|
||||
save_res_path: ./output/rec/predicts_chinese_common_v2.0.txt
|
||||
|
||||
|
||||
Optimizer:
|
||||
|
|
|
@ -19,6 +19,7 @@ Global:
|
|||
max_text_length: 25
|
||||
infer_mode: False
|
||||
use_space_char: True
|
||||
save_res_path: ./output/rec/predicts_chinese_lite_v2.0.txt
|
||||
|
||||
|
||||
Optimizer:
|
||||
|
|
|
@ -0,0 +1,159 @@
|
|||
Global:
|
||||
debug: false
|
||||
use_gpu: true
|
||||
epoch_num: 800
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 10
|
||||
save_model_dir: ./output/rec_chinese_lite_distillation_v2.1
|
||||
save_epoch_step: 3
|
||||
eval_batch_step: [0, 2000]
|
||||
cal_metric_during_train: true
|
||||
pretrained_model:
|
||||
checkpoints:
|
||||
save_inference_dir:
|
||||
use_visualdl: false
|
||||
infer_img: doc/imgs_words/ch/word_1.jpg
|
||||
character_dict_path: ppocr/utils/ppocr_keys_v1.txt
|
||||
character_type: ch
|
||||
max_text_length: 25
|
||||
infer_mode: false
|
||||
use_space_char: true
|
||||
distributed: true
|
||||
save_res_path: ./output/rec/predicts_chinese_lite_distillation_v2.1.txt
|
||||
|
||||
|
||||
Optimizer:
|
||||
name: Adam
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
||||
lr:
|
||||
name: Piecewise
|
||||
decay_epochs : [700, 800]
|
||||
values : [0.001, 0.0001]
|
||||
warmup_epoch: 5
|
||||
regularizer:
|
||||
name: L2
|
||||
factor: 2.0e-05
|
||||
|
||||
Architecture:
|
||||
model_type: &model_type "rec"
|
||||
name: DistillationModel
|
||||
algorithm: Distillation
|
||||
Models:
|
||||
Teacher:
|
||||
pretrained:
|
||||
freeze_params: false
|
||||
return_all_feats: true
|
||||
model_type: *model_type
|
||||
algorithm: CRNN
|
||||
Transform:
|
||||
Backbone:
|
||||
name: MobileNetV1Enhance
|
||||
scale: 0.5
|
||||
Neck:
|
||||
name: SequenceEncoder
|
||||
encoder_type: rnn
|
||||
hidden_size: 64
|
||||
Head:
|
||||
name: CTCHead
|
||||
mid_channels: 96
|
||||
fc_decay: 0.00002
|
||||
Student:
|
||||
pretrained:
|
||||
freeze_params: false
|
||||
return_all_feats: true
|
||||
model_type: *model_type
|
||||
algorithm: CRNN
|
||||
Transform:
|
||||
Backbone:
|
||||
name: MobileNetV1Enhance
|
||||
scale: 0.5
|
||||
Neck:
|
||||
name: SequenceEncoder
|
||||
encoder_type: rnn
|
||||
hidden_size: 64
|
||||
Head:
|
||||
name: CTCHead
|
||||
mid_channels: 96
|
||||
fc_decay: 0.00002
|
||||
|
||||
|
||||
Loss:
|
||||
name: CombinedLoss
|
||||
loss_config_list:
|
||||
- DistillationCTCLoss:
|
||||
weight: 1.0
|
||||
model_name_list: ["Student", "Teacher"]
|
||||
key: head_out
|
||||
- DistillationDMLLoss:
|
||||
weight: 1.0
|
||||
act: "softmax"
|
||||
model_name_pairs:
|
||||
- ["Student", "Teacher"]
|
||||
key: head_out
|
||||
- DistillationDistanceLoss:
|
||||
weight: 1.0
|
||||
mode: "l2"
|
||||
model_name_pairs:
|
||||
- ["Student", "Teacher"]
|
||||
key: backbone_out
|
||||
|
||||
PostProcess:
|
||||
name: DistillationCTCLabelDecode
|
||||
model_name: ["Student", "Teacher"]
|
||||
key: head_out
|
||||
|
||||
Metric:
|
||||
name: DistillationMetric
|
||||
base_metric_name: RecMetric
|
||||
main_indicator: acc
|
||||
key: "Student"
|
||||
|
||||
Train:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: ./train_data/
|
||||
label_file_list:
|
||||
- ./train_data/train_list.txt
|
||||
transforms:
|
||||
- DecodeImage:
|
||||
img_mode: BGR
|
||||
channel_first: false
|
||||
- RecAug:
|
||||
- CTCLabelEncode:
|
||||
- RecResizeImg:
|
||||
image_shape: [3, 32, 320]
|
||||
- KeepKeys:
|
||||
keep_keys:
|
||||
- image
|
||||
- label
|
||||
- length
|
||||
loader:
|
||||
shuffle: true
|
||||
batch_size_per_card: 128
|
||||
drop_last: true
|
||||
num_sections: 1
|
||||
num_workers: 8
|
||||
Eval:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: ./train_data
|
||||
label_file_list:
|
||||
- ./train_data/val_list.txt
|
||||
transforms:
|
||||
- DecodeImage:
|
||||
img_mode: BGR
|
||||
channel_first: false
|
||||
- CTCLabelEncode:
|
||||
- RecResizeImg:
|
||||
image_shape: [3, 32, 320]
|
||||
- KeepKeys:
|
||||
keep_keys:
|
||||
- image
|
||||
- label
|
||||
- length
|
||||
loader:
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
batch_size_per_card: 128
|
||||
num_workers: 8
|
|
@ -19,21 +19,56 @@ import logging
|
|||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
support_list = {
|
||||
'it':'italian', 'xi':'spanish', 'pu':'portuguese', 'ru':'russian', 'ar':'arabic',
|
||||
'ta':'tamil', 'ug':'uyghur', 'fa':'persian', 'ur':'urdu', 'rs':'serbian latin',
|
||||
'oc':'occitan', 'rsc':'serbian cyrillic', 'bg':'bulgarian', 'uk':'ukranian', 'be':'belarusian',
|
||||
'te':'telugu', 'ka':'kannada', 'chinese_cht':'chinese tradition','hi':'hindi','mr':'marathi',
|
||||
'ne':'nepali',
|
||||
'it': 'italian',
|
||||
'xi': 'spanish',
|
||||
'pu': 'portuguese',
|
||||
'ru': 'russian',
|
||||
'ar': 'arabic',
|
||||
'ta': 'tamil',
|
||||
'ug': 'uyghur',
|
||||
'fa': 'persian',
|
||||
'ur': 'urdu',
|
||||
'rs': 'serbian latin',
|
||||
'oc': 'occitan',
|
||||
'rsc': 'serbian cyrillic',
|
||||
'bg': 'bulgarian',
|
||||
'uk': 'ukranian',
|
||||
'be': 'belarusian',
|
||||
'te': 'telugu',
|
||||
'ka': 'kannada',
|
||||
'chinese_cht': 'chinese tradition',
|
||||
'hi': 'hindi',
|
||||
'mr': 'marathi',
|
||||
'ne': 'nepali',
|
||||
}
|
||||
assert(
|
||||
os.path.isfile("./rec_multi_language_lite_train.yml")
|
||||
),"Loss basic configuration file rec_multi_language_lite_train.yml.\
|
||||
|
||||
latin_lang = [
|
||||
'af', 'az', 'bs', 'cs', 'cy', 'da', 'de', 'es', 'et', 'fr', 'ga', 'hr',
|
||||
'hu', 'id', 'is', 'it', 'ku', 'la', 'lt', 'lv', 'mi', 'ms', 'mt', 'nl',
|
||||
'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'rs_latin', 'sk', 'sl', 'sq', 'sv',
|
||||
'sw', 'tl', 'tr', 'uz', 'vi', 'latin'
|
||||
]
|
||||
arabic_lang = ['ar', 'fa', 'ug', 'ur']
|
||||
cyrillic_lang = [
|
||||
'ru', 'rs_cyrillic', 'be', 'bg', 'uk', 'mn', 'abq', 'ady', 'kbd', 'ava',
|
||||
'dar', 'inh', 'che', 'lbe', 'lez', 'tab', 'cyrillic'
|
||||
]
|
||||
devanagari_lang = [
|
||||
'hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new', 'gom',
|
||||
'sa', 'bgc', 'devanagari'
|
||||
]
|
||||
multi_lang = latin_lang + arabic_lang + cyrillic_lang + devanagari_lang
|
||||
|
||||
assert (os.path.isfile("./rec_multi_language_lite_train.yml")
|
||||
), "Loss basic configuration file rec_multi_language_lite_train.yml.\
|
||||
You can download it from \
|
||||
https://github.com/PaddlePaddle/PaddleOCR/tree/dygraph/configs/rec/multi_language/"
|
||||
|
||||
global_config = yaml.load(open("./rec_multi_language_lite_train.yml", 'rb'), Loader=yaml.Loader)
|
||||
|
||||
global_config = yaml.load(
|
||||
open("./rec_multi_language_lite_train.yml", 'rb'), Loader=yaml.Loader)
|
||||
project_path = os.path.abspath(os.path.join(os.getcwd(), "../../../"))
|
||||
|
||||
|
||||
class ArgsParser(ArgumentParser):
|
||||
def __init__(self):
|
||||
super(ArgsParser, self).__init__(
|
||||
|
@ -41,15 +76,30 @@ class ArgsParser(ArgumentParser):
|
|||
self.add_argument(
|
||||
"-o", "--opt", nargs='+', help="set configuration options")
|
||||
self.add_argument(
|
||||
"-l", "--language", nargs='+', help="set language type, support {}".format(support_list))
|
||||
"-l",
|
||||
"--language",
|
||||
nargs='+',
|
||||
help="set language type, support {}".format(support_list))
|
||||
self.add_argument(
|
||||
"--train",type=str,help="you can use this command to change the train dataset default path")
|
||||
"--train",
|
||||
type=str,
|
||||
help="you can use this command to change the train dataset default path"
|
||||
)
|
||||
self.add_argument(
|
||||
"--val",type=str,help="you can use this command to change the eval dataset default path")
|
||||
"--val",
|
||||
type=str,
|
||||
help="you can use this command to change the eval dataset default path"
|
||||
)
|
||||
self.add_argument(
|
||||
"--dict",type=str,help="you can use this command to change the dictionary default path")
|
||||
"--dict",
|
||||
type=str,
|
||||
help="you can use this command to change the dictionary default path"
|
||||
)
|
||||
self.add_argument(
|
||||
"--data_dir",type=str,help="you can use this command to change the dataset default root path")
|
||||
"--data_dir",
|
||||
type=str,
|
||||
help="you can use this command to change the dataset default root path"
|
||||
)
|
||||
|
||||
def parse_args(self, argv=None):
|
||||
args = super(ArgsParser, self).parse_args(argv)
|
||||
|
@ -68,21 +118,37 @@ class ArgsParser(ArgumentParser):
|
|||
return config
|
||||
|
||||
def _set_language(self, type):
|
||||
assert(type),"please use -l or --language to choose language type"
|
||||
lang = type[0]
|
||||
assert (type), "please use -l or --language to choose language type"
|
||||
assert(
|
||||
type[0] in support_list.keys()
|
||||
lang in support_list.keys() or lang in multi_lang
|
||||
),"the sub_keys(-l or --language) can only be one of support list: \n{},\nbut get: {}, " \
|
||||
"please check your running command".format(support_list, type)
|
||||
global_config['Global']['character_dict_path'] = 'ppocr/utils/dict/{}_dict.txt'.format(type[0])
|
||||
global_config['Global']['save_model_dir'] = './output/rec_{}_lite'.format(type[0])
|
||||
global_config['Train']['dataset']['label_file_list'] = ["train_data/{}_train.txt".format(type[0])]
|
||||
global_config['Eval']['dataset']['label_file_list'] = ["train_data/{}_val.txt".format(type[0])]
|
||||
global_config['Global']['character_type'] = type[0]
|
||||
assert(
|
||||
os.path.isfile(os.path.join(project_path,global_config['Global']['character_dict_path']))
|
||||
),"Loss default dictionary file {}_dict.txt.You can download it from \
|
||||
https://github.com/PaddlePaddle/PaddleOCR/tree/dygraph/ppocr/utils/dict/".format(type[0])
|
||||
return type[0]
|
||||
"please check your running command".format(multi_lang, type)
|
||||
if lang in latin_lang:
|
||||
lang = "latin"
|
||||
elif lang in arabic_lang:
|
||||
lang = "arabic"
|
||||
elif lang in cyrillic_lang:
|
||||
lang = "cyrillic"
|
||||
elif lang in devanagari_lang:
|
||||
lang = "devanagari"
|
||||
global_config['Global'][
|
||||
'character_dict_path'] = 'ppocr/utils/dict/{}_dict.txt'.format(lang)
|
||||
global_config['Global'][
|
||||
'save_model_dir'] = './output/rec_{}_lite'.format(lang)
|
||||
global_config['Train']['dataset'][
|
||||
'label_file_list'] = ["train_data/{}_train.txt".format(lang)]
|
||||
global_config['Eval']['dataset'][
|
||||
'label_file_list'] = ["train_data/{}_val.txt".format(lang)]
|
||||
global_config['Global']['character_type'] = lang
|
||||
assert (
|
||||
os.path.isfile(
|
||||
os.path.join(project_path, global_config['Global'][
|
||||
'character_dict_path']))
|
||||
), "Loss default dictionary file {}_dict.txt.You can download it from \
|
||||
https://github.com/PaddlePaddle/PaddleOCR/tree/dygraph/ppocr/utils/dict/".format(
|
||||
lang)
|
||||
return lang
|
||||
|
||||
|
||||
def merge_config(config):
|
||||
|
@ -110,43 +176,51 @@ def merge_config(config):
|
|||
cur[sub_key] = value
|
||||
else:
|
||||
cur = cur[sub_key]
|
||||
|
||||
def loss_file(path):
|
||||
assert(
|
||||
os.path.exists(path)
|
||||
),"There is no such file:{},Please do not forget to put in the specified file".format(path)
|
||||
|
||||
|
||||
|
||||
def loss_file(path):
|
||||
assert (
|
||||
os.path.exists(path)
|
||||
), "There is no such file:{},Please do not forget to put in the specified file".format(
|
||||
path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
FLAGS = ArgsParser().parse_args()
|
||||
merge_config(FLAGS.opt)
|
||||
save_file_path = 'rec_{}_lite_train.yml'.format(FLAGS.language)
|
||||
if os.path.isfile(save_file_path):
|
||||
os.remove(save_file_path)
|
||||
|
||||
|
||||
if FLAGS.train:
|
||||
global_config['Train']['dataset']['label_file_list'] = [FLAGS.train]
|
||||
train_label_path = os.path.join(project_path,FLAGS.train)
|
||||
train_label_path = os.path.join(project_path, FLAGS.train)
|
||||
loss_file(train_label_path)
|
||||
if FLAGS.val:
|
||||
global_config['Eval']['dataset']['label_file_list'] = [FLAGS.val]
|
||||
eval_label_path = os.path.join(project_path,FLAGS.val)
|
||||
eval_label_path = os.path.join(project_path, FLAGS.val)
|
||||
loss_file(eval_label_path)
|
||||
if FLAGS.dict:
|
||||
global_config['Global']['character_dict_path'] = FLAGS.dict
|
||||
dict_path = os.path.join(project_path,FLAGS.dict)
|
||||
dict_path = os.path.join(project_path, FLAGS.dict)
|
||||
loss_file(dict_path)
|
||||
if FLAGS.data_dir:
|
||||
global_config['Eval']['dataset']['data_dir'] = FLAGS.data_dir
|
||||
global_config['Train']['dataset']['data_dir'] = FLAGS.data_dir
|
||||
data_dir = os.path.join(project_path,FLAGS.data_dir)
|
||||
data_dir = os.path.join(project_path, FLAGS.data_dir)
|
||||
loss_file(data_dir)
|
||||
|
||||
|
||||
with open(save_file_path, 'w') as f:
|
||||
yaml.dump(dict(global_config), f, default_flow_style=False, sort_keys=False)
|
||||
yaml.dump(
|
||||
dict(global_config), f, default_flow_style=False, sort_keys=False)
|
||||
logging.info("Project path is :{}".format(project_path))
|
||||
logging.info("Train list path set to :{}".format(global_config['Train']['dataset']['label_file_list'][0]))
|
||||
logging.info("Eval list path set to :{}".format(global_config['Eval']['dataset']['label_file_list'][0]))
|
||||
logging.info("Dataset root path set to :{}".format(global_config['Eval']['dataset']['data_dir']))
|
||||
logging.info("Dict path set to :{}".format(global_config['Global']['character_dict_path']))
|
||||
logging.info("Config file set to :configs/rec/multi_language/{}".format(save_file_path))
|
||||
logging.info("Train list path set to :{}".format(global_config['Train'][
|
||||
'dataset']['label_file_list'][0]))
|
||||
logging.info("Eval list path set to :{}".format(global_config['Eval'][
|
||||
'dataset']['label_file_list'][0]))
|
||||
logging.info("Dataset root path set to :{}".format(global_config['Eval'][
|
||||
'dataset']['data_dir']))
|
||||
logging.info("Dict path set to :{}".format(global_config['Global'][
|
||||
'character_dict_path']))
|
||||
logging.info("Config file set to :configs/rec/multi_language/{}".
|
||||
format(save_file_path))
|
||||
|
|
|
@ -0,0 +1,111 @@
|
|||
Global:
|
||||
use_gpu: true
|
||||
epoch_num: 500
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 10
|
||||
save_model_dir: ./output/rec_arabic_lite
|
||||
save_epoch_step: 3
|
||||
eval_batch_step:
|
||||
- 0
|
||||
- 2000
|
||||
cal_metric_during_train: true
|
||||
pretrained_model: null
|
||||
checkpoints: null
|
||||
save_inference_dir: null
|
||||
use_visualdl: false
|
||||
infer_img: null
|
||||
character_dict_path: ppocr/utils/dict/arabic_dict.txt
|
||||
character_type: arabic
|
||||
max_text_length: 25
|
||||
infer_mode: false
|
||||
use_space_char: true
|
||||
Optimizer:
|
||||
name: Adam
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
||||
lr:
|
||||
name: Cosine
|
||||
learning_rate: 0.001
|
||||
regularizer:
|
||||
name: L2
|
||||
factor: 1.0e-05
|
||||
Architecture:
|
||||
model_type: rec
|
||||
algorithm: CRNN
|
||||
Transform: null
|
||||
Backbone:
|
||||
name: MobileNetV3
|
||||
scale: 0.5
|
||||
model_name: small
|
||||
small_stride:
|
||||
- 1
|
||||
- 2
|
||||
- 2
|
||||
- 2
|
||||
Neck:
|
||||
name: SequenceEncoder
|
||||
encoder_type: rnn
|
||||
hidden_size: 48
|
||||
Head:
|
||||
name: CTCHead
|
||||
fc_decay: 1.0e-05
|
||||
Loss:
|
||||
name: CTCLoss
|
||||
PostProcess:
|
||||
name: CTCLabelDecode
|
||||
Metric:
|
||||
name: RecMetric
|
||||
main_indicator: acc
|
||||
Train:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: train_data/
|
||||
label_file_list:
|
||||
- train_data/arabic_train.txt
|
||||
transforms:
|
||||
- DecodeImage:
|
||||
img_mode: BGR
|
||||
channel_first: false
|
||||
- RecAug: null
|
||||
- CTCLabelEncode: null
|
||||
- RecResizeImg:
|
||||
image_shape:
|
||||
- 3
|
||||
- 32
|
||||
- 320
|
||||
- KeepKeys:
|
||||
keep_keys:
|
||||
- image
|
||||
- label
|
||||
- length
|
||||
loader:
|
||||
shuffle: true
|
||||
batch_size_per_card: 256
|
||||
drop_last: true
|
||||
num_workers: 8
|
||||
Eval:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: train_data/
|
||||
label_file_list:
|
||||
- train_data/arabic_val.txt
|
||||
transforms:
|
||||
- DecodeImage:
|
||||
img_mode: BGR
|
||||
channel_first: false
|
||||
- CTCLabelEncode: null
|
||||
- RecResizeImg:
|
||||
image_shape:
|
||||
- 3
|
||||
- 32
|
||||
- 320
|
||||
- KeepKeys:
|
||||
keep_keys:
|
||||
- image
|
||||
- label
|
||||
- length
|
||||
loader:
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
batch_size_per_card: 256
|
||||
num_workers: 8
|
|
@ -0,0 +1,111 @@
|
|||
Global:
|
||||
use_gpu: true
|
||||
epoch_num: 500
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 10
|
||||
save_model_dir: ./output/rec_cyrillic_lite
|
||||
save_epoch_step: 3
|
||||
eval_batch_step:
|
||||
- 0
|
||||
- 2000
|
||||
cal_metric_during_train: true
|
||||
pretrained_model: null
|
||||
checkpoints: null
|
||||
save_inference_dir: null
|
||||
use_visualdl: false
|
||||
infer_img: null
|
||||
character_dict_path: ppocr/utils/dict/cyrillic_dict.txt
|
||||
character_type: cyrillic
|
||||
max_text_length: 25
|
||||
infer_mode: false
|
||||
use_space_char: true
|
||||
Optimizer:
|
||||
name: Adam
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
||||
lr:
|
||||
name: Cosine
|
||||
learning_rate: 0.001
|
||||
regularizer:
|
||||
name: L2
|
||||
factor: 1.0e-05
|
||||
Architecture:
|
||||
model_type: rec
|
||||
algorithm: CRNN
|
||||
Transform: null
|
||||
Backbone:
|
||||
name: MobileNetV3
|
||||
scale: 0.5
|
||||
model_name: small
|
||||
small_stride:
|
||||
- 1
|
||||
- 2
|
||||
- 2
|
||||
- 2
|
||||
Neck:
|
||||
name: SequenceEncoder
|
||||
encoder_type: rnn
|
||||
hidden_size: 48
|
||||
Head:
|
||||
name: CTCHead
|
||||
fc_decay: 1.0e-05
|
||||
Loss:
|
||||
name: CTCLoss
|
||||
PostProcess:
|
||||
name: CTCLabelDecode
|
||||
Metric:
|
||||
name: RecMetric
|
||||
main_indicator: acc
|
||||
Train:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: train_data/
|
||||
label_file_list:
|
||||
- train_data/cyrillic_train.txt
|
||||
transforms:
|
||||
- DecodeImage:
|
||||
img_mode: BGR
|
||||
channel_first: false
|
||||
- RecAug: null
|
||||
- CTCLabelEncode: null
|
||||
- RecResizeImg:
|
||||
image_shape:
|
||||
- 3
|
||||
- 32
|
||||
- 320
|
||||
- KeepKeys:
|
||||
keep_keys:
|
||||
- image
|
||||
- label
|
||||
- length
|
||||
loader:
|
||||
shuffle: true
|
||||
batch_size_per_card: 256
|
||||
drop_last: true
|
||||
num_workers: 8
|
||||
Eval:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: train_data/
|
||||
label_file_list:
|
||||
- train_data/cyrillic_val.txt
|
||||
transforms:
|
||||
- DecodeImage:
|
||||
img_mode: BGR
|
||||
channel_first: false
|
||||
- CTCLabelEncode: null
|
||||
- RecResizeImg:
|
||||
image_shape:
|
||||
- 3
|
||||
- 32
|
||||
- 320
|
||||
- KeepKeys:
|
||||
keep_keys:
|
||||
- image
|
||||
- label
|
||||
- length
|
||||
loader:
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
batch_size_per_card: 256
|
||||
num_workers: 8
|
|
@ -0,0 +1,111 @@
|
|||
Global:
|
||||
use_gpu: true
|
||||
epoch_num: 500
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 10
|
||||
save_model_dir: ./output/rec_devanagari_lite
|
||||
save_epoch_step: 3
|
||||
eval_batch_step:
|
||||
- 0
|
||||
- 2000
|
||||
cal_metric_during_train: true
|
||||
pretrained_model: null
|
||||
checkpoints: null
|
||||
save_inference_dir: null
|
||||
use_visualdl: false
|
||||
infer_img: null
|
||||
character_dict_path: ppocr/utils/dict/devanagari_dict.txt
|
||||
character_type: devanagari
|
||||
max_text_length: 25
|
||||
infer_mode: false
|
||||
use_space_char: true
|
||||
Optimizer:
|
||||
name: Adam
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
||||
lr:
|
||||
name: Cosine
|
||||
learning_rate: 0.001
|
||||
regularizer:
|
||||
name: L2
|
||||
factor: 1.0e-05
|
||||
Architecture:
|
||||
model_type: rec
|
||||
algorithm: CRNN
|
||||
Transform: null
|
||||
Backbone:
|
||||
name: MobileNetV3
|
||||
scale: 0.5
|
||||
model_name: small
|
||||
small_stride:
|
||||
- 1
|
||||
- 2
|
||||
- 2
|
||||
- 2
|
||||
Neck:
|
||||
name: SequenceEncoder
|
||||
encoder_type: rnn
|
||||
hidden_size: 48
|
||||
Head:
|
||||
name: CTCHead
|
||||
fc_decay: 1.0e-05
|
||||
Loss:
|
||||
name: CTCLoss
|
||||
PostProcess:
|
||||
name: CTCLabelDecode
|
||||
Metric:
|
||||
name: RecMetric
|
||||
main_indicator: acc
|
||||
Train:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: train_data/
|
||||
label_file_list:
|
||||
- train_data/devanagari_train.txt
|
||||
transforms:
|
||||
- DecodeImage:
|
||||
img_mode: BGR
|
||||
channel_first: false
|
||||
- RecAug: null
|
||||
- CTCLabelEncode: null
|
||||
- RecResizeImg:
|
||||
image_shape:
|
||||
- 3
|
||||
- 32
|
||||
- 320
|
||||
- KeepKeys:
|
||||
keep_keys:
|
||||
- image
|
||||
- label
|
||||
- length
|
||||
loader:
|
||||
shuffle: true
|
||||
batch_size_per_card: 256
|
||||
drop_last: true
|
||||
num_workers: 8
|
||||
Eval:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: train_data/
|
||||
label_file_list:
|
||||
- train_data/devanagari_val.txt
|
||||
transforms:
|
||||
- DecodeImage:
|
||||
img_mode: BGR
|
||||
channel_first: false
|
||||
- CTCLabelEncode: null
|
||||
- RecResizeImg:
|
||||
image_shape:
|
||||
- 3
|
||||
- 32
|
||||
- 320
|
||||
- KeepKeys:
|
||||
keep_keys:
|
||||
- image
|
||||
- label
|
||||
- length
|
||||
loader:
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
batch_size_per_card: 256
|
||||
num_workers: 8
|
|
@ -15,11 +15,11 @@ Global:
|
|||
use_visualdl: False
|
||||
infer_img:
|
||||
# for data or label process
|
||||
character_dict_path: ppocr/utils/dict/en_dict.txt
|
||||
character_dict_path: ppocr/utils/en_dict.txt
|
||||
character_type: EN
|
||||
max_text_length: 25
|
||||
infer_mode: False
|
||||
use_space_char: False
|
||||
use_space_char: True
|
||||
|
||||
|
||||
Optimizer:
|
||||
|
|
|
@ -0,0 +1,111 @@
|
|||
Global:
|
||||
use_gpu: true
|
||||
epoch_num: 500
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 10
|
||||
save_model_dir: ./output/rec_latin_lite
|
||||
save_epoch_step: 3
|
||||
eval_batch_step:
|
||||
- 0
|
||||
- 2000
|
||||
cal_metric_during_train: true
|
||||
pretrained_model: null
|
||||
checkpoints: null
|
||||
save_inference_dir: null
|
||||
use_visualdl: false
|
||||
infer_img: null
|
||||
character_dict_path: ppocr/utils/dict/latin_dict.txt
|
||||
character_type: latin
|
||||
max_text_length: 25
|
||||
infer_mode: false
|
||||
use_space_char: true
|
||||
Optimizer:
|
||||
name: Adam
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
||||
lr:
|
||||
name: Cosine
|
||||
learning_rate: 0.001
|
||||
regularizer:
|
||||
name: L2
|
||||
factor: 1.0e-05
|
||||
Architecture:
|
||||
model_type: rec
|
||||
algorithm: CRNN
|
||||
Transform: null
|
||||
Backbone:
|
||||
name: MobileNetV3
|
||||
scale: 0.5
|
||||
model_name: small
|
||||
small_stride:
|
||||
- 1
|
||||
- 2
|
||||
- 2
|
||||
- 2
|
||||
Neck:
|
||||
name: SequenceEncoder
|
||||
encoder_type: rnn
|
||||
hidden_size: 48
|
||||
Head:
|
||||
name: CTCHead
|
||||
fc_decay: 1.0e-05
|
||||
Loss:
|
||||
name: CTCLoss
|
||||
PostProcess:
|
||||
name: CTCLabelDecode
|
||||
Metric:
|
||||
name: RecMetric
|
||||
main_indicator: acc
|
||||
Train:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: train_data/
|
||||
label_file_list:
|
||||
- train_data/latin_train.txt
|
||||
transforms:
|
||||
- DecodeImage:
|
||||
img_mode: BGR
|
||||
channel_first: false
|
||||
- RecAug: null
|
||||
- CTCLabelEncode: null
|
||||
- RecResizeImg:
|
||||
image_shape:
|
||||
- 3
|
||||
- 32
|
||||
- 320
|
||||
- KeepKeys:
|
||||
keep_keys:
|
||||
- image
|
||||
- label
|
||||
- length
|
||||
loader:
|
||||
shuffle: true
|
||||
batch_size_per_card: 256
|
||||
drop_last: true
|
||||
num_workers: 8
|
||||
Eval:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: train_data/
|
||||
label_file_list:
|
||||
- train_data/latin_val.txt
|
||||
transforms:
|
||||
- DecodeImage:
|
||||
img_mode: BGR
|
||||
channel_first: false
|
||||
- CTCLabelEncode: null
|
||||
- RecResizeImg:
|
||||
image_shape:
|
||||
- 3
|
||||
- 32
|
||||
- 320
|
||||
- KeepKeys:
|
||||
keep_keys:
|
||||
- image
|
||||
- label
|
||||
- length
|
||||
loader:
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
batch_size_per_card: 256
|
||||
num_workers: 8
|
|
@ -10,7 +10,7 @@ Global:
|
|||
cal_metric_during_train: True
|
||||
pretrained_model:
|
||||
checkpoints:
|
||||
save_inference_dir:
|
||||
save_inference_dir: ./
|
||||
use_visualdl: False
|
||||
infer_img: doc/imgs_words_en/word_10.png
|
||||
# for data or label process
|
||||
|
@ -19,6 +19,7 @@ Global:
|
|||
max_text_length: 25
|
||||
infer_mode: False
|
||||
use_space_char: False
|
||||
save_res_path: ./output/rec/predicts_ic15.txt
|
||||
|
||||
Optimizer:
|
||||
name: Adam
|
||||
|
@ -59,8 +60,8 @@ Metric:
|
|||
Train:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: ./train_data/
|
||||
label_file_list: ["./train_data/train_list.txt"]
|
||||
data_dir: ./train_data/ic15_data/
|
||||
label_file_list: ["./train_data/ic15_data/rec_gt_train.txt"]
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
|
@ -80,8 +81,8 @@ Train:
|
|||
Eval:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: ./train_data/
|
||||
label_file_list: ["./train_data/train_list.txt"]
|
||||
data_dir: ./train_data/ic15_data
|
||||
label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"]
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
|
|
|
@ -0,0 +1,102 @@
|
|||
Global:
|
||||
use_gpu: True
|
||||
epoch_num: 21
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 10
|
||||
save_model_dir: ./output/rec/nrtr/
|
||||
save_epoch_step: 1
|
||||
# evaluation is run every 2000 iterations
|
||||
eval_batch_step: [0, 2000]
|
||||
cal_metric_during_train: True
|
||||
pretrained_model:
|
||||
checkpoints:
|
||||
save_inference_dir:
|
||||
use_visualdl: False
|
||||
infer_img: doc/imgs_words_en/word_10.png
|
||||
# for data or label process
|
||||
character_dict_path:
|
||||
character_type: EN_symbol
|
||||
max_text_length: 25
|
||||
infer_mode: False
|
||||
use_space_char: True
|
||||
save_res_path: ./output/rec/predicts_nrtr.txt
|
||||
|
||||
Optimizer:
|
||||
name: Adam
|
||||
beta1: 0.9
|
||||
beta2: 0.99
|
||||
clip_norm: 5.0
|
||||
lr:
|
||||
name: Cosine
|
||||
learning_rate: 0.0005
|
||||
warmup_epoch: 2
|
||||
regularizer:
|
||||
name: 'L2'
|
||||
factor: 0.
|
||||
|
||||
Architecture:
|
||||
model_type: rec
|
||||
algorithm: NRTR
|
||||
in_channels: 1
|
||||
Transform:
|
||||
Backbone:
|
||||
name: MTB
|
||||
cnn_num: 2
|
||||
Head:
|
||||
name: Transformer
|
||||
d_model: 512
|
||||
num_encoder_layers: 6
|
||||
beam_size: 10 # When Beam size is greater than 0, it means to use beam search when evaluation.
|
||||
|
||||
|
||||
Loss:
|
||||
name: NRTRLoss
|
||||
smoothing: True
|
||||
|
||||
PostProcess:
|
||||
name: NRTRLabelDecode
|
||||
|
||||
Metric:
|
||||
name: RecMetric
|
||||
main_indicator: acc
|
||||
|
||||
Train:
|
||||
dataset:
|
||||
name: LMDBDataSet
|
||||
data_dir: ./train_data/data_lmdb_release/training/
|
||||
transforms:
|
||||
- NRTRDecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- NRTRLabelEncode: # Class handling label
|
||||
- NRTRRecResizeImg:
|
||||
image_shape: [100, 32]
|
||||
resize_type: PIL # PIL or OpenCV
|
||||
- KeepKeys:
|
||||
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
||||
loader:
|
||||
shuffle: True
|
||||
batch_size_per_card: 512
|
||||
drop_last: True
|
||||
num_workers: 8
|
||||
|
||||
Eval:
|
||||
dataset:
|
||||
name: LMDBDataSet
|
||||
data_dir: ./train_data/data_lmdb_release/evaluation/
|
||||
transforms:
|
||||
- NRTRDecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- NRTRLabelEncode: # Class handling label
|
||||
- NRTRRecResizeImg:
|
||||
image_shape: [100, 32]
|
||||
resize_type: PIL # PIL or OpenCV
|
||||
- KeepKeys:
|
||||
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
||||
loader:
|
||||
shuffle: False
|
||||
drop_last: False
|
||||
batch_size_per_card: 256
|
||||
num_workers: 1
|
||||
use_shared_memory: False
|
|
@ -19,6 +19,7 @@ Global:
|
|||
max_text_length: 25
|
||||
infer_mode: False
|
||||
use_space_char: False
|
||||
save_res_path: ./output/rec/predicts_mv3_none_bilstm_ctc.txt
|
||||
|
||||
Optimizer:
|
||||
name: Adam
|
||||
|
|
|
@ -19,6 +19,7 @@ Global:
|
|||
max_text_length: 25
|
||||
infer_mode: False
|
||||
use_space_char: False
|
||||
save_res_path: ./output/rec/predicts_mv3_none_none_ctc.txt
|
||||
|
||||
Optimizer:
|
||||
name: Adam
|
||||
|
|
|
@ -19,6 +19,7 @@ Global:
|
|||
max_text_length: 25
|
||||
infer_mode: False
|
||||
use_space_char: False
|
||||
save_res_path: ./output/rec/predicts_mv3_tps_bilstm_att.txt
|
||||
|
||||
|
||||
Optimizer:
|
||||
|
|
|
@ -19,6 +19,7 @@ Global:
|
|||
max_text_length: 25
|
||||
infer_mode: False
|
||||
use_space_char: False
|
||||
save_res_path: ./output/rec/predicts_mv3_tps_bilstm_ctc.txt
|
||||
|
||||
Optimizer:
|
||||
name: Adam
|
||||
|
|
|
@ -19,6 +19,7 @@ Global:
|
|||
max_text_length: 25
|
||||
infer_mode: False
|
||||
use_space_char: False
|
||||
save_res_path: ./output/rec/predicts_r34_vd_none_bilstm_ctc.txt
|
||||
|
||||
Optimizer:
|
||||
name: Adam
|
||||
|
|
|
@ -19,6 +19,7 @@ Global:
|
|||
max_text_length: 25
|
||||
infer_mode: False
|
||||
use_space_char: False
|
||||
save_res_path: ./output/rec/predicts_r34_vd_none_none_ctc.txt
|
||||
|
||||
Optimizer:
|
||||
name: Adam
|
||||
|
|
|
@ -19,6 +19,7 @@ Global:
|
|||
max_text_length: 25
|
||||
infer_mode: False
|
||||
use_space_char: False
|
||||
save_res_path: ./output/rec/predicts_b3_rare_r34_none_gru.txt
|
||||
|
||||
|
||||
Optimizer:
|
||||
|
|
|
@ -19,6 +19,7 @@ Global:
|
|||
max_text_length: 25
|
||||
infer_mode: False
|
||||
use_space_char: False
|
||||
save_res_path: ./output/rec/predicts_r34_vd_tps_bilstm_ctc.txt
|
||||
|
||||
Optimizer:
|
||||
name: Adam
|
||||
|
@ -37,7 +38,7 @@ Architecture:
|
|||
name: TPS
|
||||
num_fiducial: 20
|
||||
loc_lr: 0.1
|
||||
model_name: small
|
||||
model_name: large
|
||||
Backbone:
|
||||
name: ResNet
|
||||
layers: 34
|
||||
|
|
|
@ -20,6 +20,7 @@ Global:
|
|||
num_heads: 8
|
||||
infer_mode: False
|
||||
use_space_char: False
|
||||
save_res_path: ./output/rec/predicts_srn.txt
|
||||
|
||||
|
||||
Optimizer:
|
||||
|
|
|
@ -0,0 +1,116 @@
|
|||
Global:
|
||||
use_gpu: true
|
||||
epoch_num: 50
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 5
|
||||
save_model_dir: ./output/table_mv3/
|
||||
save_epoch_step: 5
|
||||
# evaluation is run every 400 iterations after the 0th iteration
|
||||
eval_batch_step: [0, 400]
|
||||
cal_metric_during_train: True
|
||||
pretrained_model:
|
||||
checkpoints:
|
||||
save_inference_dir:
|
||||
use_visualdl: False
|
||||
infer_img: doc/imgs_words/ch/word_1.jpg
|
||||
# for data or label process
|
||||
character_dict_path: ppocr/utils/dict/table_structure_dict.txt
|
||||
character_type: en
|
||||
max_text_length: 100
|
||||
max_elem_length: 500
|
||||
max_cell_num: 500
|
||||
infer_mode: False
|
||||
process_total_num: 0
|
||||
process_cut_num: 0
|
||||
|
||||
|
||||
Optimizer:
|
||||
name: Adam
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
||||
clip_norm: 5.0
|
||||
lr:
|
||||
learning_rate: 0.001
|
||||
regularizer:
|
||||
name: 'L2'
|
||||
factor: 0.00000
|
||||
|
||||
Architecture:
|
||||
model_type: table
|
||||
algorithm: TableAttn
|
||||
Backbone:
|
||||
name: MobileNetV3
|
||||
scale: 1.0
|
||||
model_name: small
|
||||
disable_se: True
|
||||
Head:
|
||||
name: TableAttentionHead
|
||||
hidden_size: 256
|
||||
l2_decay: 0.00001
|
||||
loc_type: 2
|
||||
|
||||
Loss:
|
||||
name: TableAttentionLoss
|
||||
structure_weight: 100.0
|
||||
loc_weight: 10000.0
|
||||
|
||||
PostProcess:
|
||||
name: TableLabelDecode
|
||||
|
||||
Metric:
|
||||
name: TableMetric
|
||||
main_indicator: acc
|
||||
|
||||
Train:
|
||||
dataset:
|
||||
name: PubTabDataSet
|
||||
data_dir: train_data/table/pubtabnet/train/
|
||||
label_file_path: train_data/table/pubtabnet/PubTabNet_2.0.0_train.jsonl
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- ResizeTableImage:
|
||||
max_len: 488
|
||||
- TableLabelEncode:
|
||||
- NormalizeImage:
|
||||
scale: 1./255.
|
||||
mean: [0.485, 0.456, 0.406]
|
||||
std: [0.229, 0.224, 0.225]
|
||||
order: 'hwc'
|
||||
- PaddingTableImage:
|
||||
- ToCHWImage:
|
||||
- KeepKeys:
|
||||
keep_keys: ['image', 'structure', 'bbox_list', 'sp_tokens', 'bbox_list_mask']
|
||||
loader:
|
||||
shuffle: True
|
||||
batch_size_per_card: 32
|
||||
drop_last: True
|
||||
num_workers: 1
|
||||
|
||||
Eval:
|
||||
dataset:
|
||||
name: PubTabDataSet
|
||||
data_dir: train_data/table/pubtabnet/val/
|
||||
label_file_path: train_data/table/pubtabnet/PubTabNet_2.0.0_val.jsonl
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- ResizeTableImage:
|
||||
max_len: 488
|
||||
- TableLabelEncode:
|
||||
- NormalizeImage:
|
||||
scale: 1./255.
|
||||
mean: [0.485, 0.456, 0.406]
|
||||
std: [0.229, 0.224, 0.225]
|
||||
order: 'hwc'
|
||||
- PaddingTableImage:
|
||||
- ToCHWImage:
|
||||
- KeepKeys:
|
||||
keep_keys: ['image', 'structure', 'bbox_list', 'sp_tokens', 'bbox_list_mask']
|
||||
loader:
|
||||
shuffle: False
|
||||
drop_last: False
|
||||
batch_size_per_card: 16
|
||||
num_workers: 1
|
|
@ -0,0 +1,9 @@
|
|||
*.iml
|
||||
.gradle
|
||||
/local.properties
|
||||
/.idea/*
|
||||
.DS_Store
|
||||
/build
|
||||
/captures
|
||||
.externalNativeBuild
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
# 如何快速测试
|
||||
### 1. 安装最新版本的Android Studio
|
||||
可以从 https://developer.android.com/studio 下载。本Demo使用是4.0版本Android Studio编写。
|
||||
|
||||
### 2. 按照NDK 20 以上版本
|
||||
Demo测试的时候使用的是NDK 20b版本,20版本以上均可以支持编译成功。
|
||||
|
||||
如果您是初学者,可以用以下方式安装和测试NDK编译环境。
|
||||
点击 File -> New ->New Project, 新建 "Native C++" project
|
||||
|
||||
### 3. 导入项目
|
||||
点击 File->New->Import Project..., 然后跟着Android Studio的引导导入
|
||||
|
||||
|
||||
# 获得更多支持
|
||||
前往[端计算模型生成平台EasyEdge](https://ai.baidu.com/easyedge/app/open_source_demo?referrerUrl=paddlelite),获得更多开发支持:
|
||||
|
||||
- Demo APP:可使用手机扫码安装,方便手机端快速体验文字识别
|
||||
- SDK:模型被封装为适配不同芯片硬件和操作系统SDK,包括完善的接口,方便进行二次开发
|
|
@ -0,0 +1 @@
|
|||
/build
|
|
@ -0,0 +1,98 @@
|
|||
import java.security.MessageDigest
|
||||
|
||||
apply plugin: 'com.android.application'
|
||||
|
||||
android {
|
||||
compileSdkVersion 29
|
||||
defaultConfig {
|
||||
applicationId "com.baidu.paddle.lite.demo.ocr"
|
||||
minSdkVersion 23
|
||||
targetSdkVersion 29
|
||||
versionCode 1
|
||||
versionName "1.0"
|
||||
testInstrumentationRunner "android.support.test.runner.AndroidJUnitRunner"
|
||||
externalNativeBuild {
|
||||
cmake {
|
||||
cppFlags "-std=c++11 -frtti -fexceptions -Wno-format"
|
||||
arguments '-DANDROID_PLATFORM=android-23', '-DANDROID_STL=c++_shared' ,"-DANDROID_ARM_NEON=TRUE"
|
||||
}
|
||||
}
|
||||
ndk {
|
||||
// abiFilters "arm64-v8a", "armeabi-v7a"
|
||||
abiFilters "arm64-v8a", "armeabi-v7a"
|
||||
ldLibs "jnigraphics"
|
||||
}
|
||||
}
|
||||
buildTypes {
|
||||
release {
|
||||
minifyEnabled false
|
||||
proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
|
||||
}
|
||||
}
|
||||
externalNativeBuild {
|
||||
cmake {
|
||||
path "src/main/cpp/CMakeLists.txt"
|
||||
version "3.10.2"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation fileTree(include: ['*.jar'], dir: 'libs')
|
||||
implementation 'androidx.appcompat:appcompat:1.1.0'
|
||||
implementation 'androidx.constraintlayout:constraintlayout:1.1.3'
|
||||
testImplementation 'junit:junit:4.12'
|
||||
androidTestImplementation 'com.android.support.test:runner:1.0.2'
|
||||
androidTestImplementation 'com.android.support.test.espresso:espresso-core:3.0.2'
|
||||
}
|
||||
|
||||
def archives = [
|
||||
[
|
||||
'src' : 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/paddle_lite_libs_v2_9_0.tar.gz',
|
||||
'dest': 'PaddleLite'
|
||||
],
|
||||
[
|
||||
'src' : 'https://paddlelite-demo.bj.bcebos.com/libs/android/opencv-4.2.0-android-sdk.tar.gz',
|
||||
'dest': 'OpenCV'
|
||||
],
|
||||
[
|
||||
'src' : 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ocr_v2_for_cpu.tar.gz',
|
||||
'dest' : 'src/main/assets/models'
|
||||
],
|
||||
[
|
||||
'src' : 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_dict.tar.gz',
|
||||
'dest' : 'src/main/assets/labels'
|
||||
]
|
||||
]
|
||||
|
||||
task downloadAndExtractArchives(type: DefaultTask) {
|
||||
doFirst {
|
||||
println "Downloading and extracting archives including libs and models"
|
||||
}
|
||||
doLast {
|
||||
// Prepare cache folder for archives
|
||||
String cachePath = "cache"
|
||||
if (!file("${cachePath}").exists()) {
|
||||
mkdir "${cachePath}"
|
||||
}
|
||||
archives.eachWithIndex { archive, index ->
|
||||
MessageDigest messageDigest = MessageDigest.getInstance('MD5')
|
||||
messageDigest.update(archive.src.bytes)
|
||||
String cacheName = new BigInteger(1, messageDigest.digest()).toString(32)
|
||||
// Download the target archive if not exists
|
||||
boolean copyFiles = !file("${archive.dest}").exists()
|
||||
if (!file("${cachePath}/${cacheName}.tar.gz").exists()) {
|
||||
ant.get(src: archive.src, dest: file("${cachePath}/${cacheName}.tar.gz"))
|
||||
copyFiles = true; // force to copy files from the latest archive files
|
||||
}
|
||||
// Extract the target archive if its dest path does not exists
|
||||
if (copyFiles) {
|
||||
copy {
|
||||
from tarTree("${cachePath}/${cacheName}.tar.gz")
|
||||
into "${archive.dest}"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
preBuild.dependsOn downloadAndExtractArchives
|
|
@ -0,0 +1,21 @@
|
|||
# Add project specific ProGuard rules here.
|
||||
# You can control the set of applied configuration files using the
|
||||
# proguardFiles setting in build.gradle.
|
||||
#
|
||||
# For more details, see
|
||||
# http://developer.android.com/guide/developing/tools/proguard.html
|
||||
|
||||
# If your project uses WebView with JS, uncomment the following
|
||||
# and specify the fully qualified class name to the JavaScript interface
|
||||
# class:
|
||||
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
|
||||
# public *;
|
||||
#}
|
||||
|
||||
# Uncomment this to preserve the line number information for
|
||||
# debugging stack traces.
|
||||
#-keepattributes SourceFile,LineNumberTable
|
||||
|
||||
# If you keep the line number information, uncomment this to
|
||||
# hide the original source file name.
|
||||
#-renamesourcefileattribute SourceFile
|
|
@ -0,0 +1,26 @@
|
|||
package com.baidu.paddle.lite.demo.ocr;
|
||||
|
||||
import android.content.Context;
|
||||
import android.support.test.InstrumentationRegistry;
|
||||
import android.support.test.runner.AndroidJUnit4;
|
||||
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
/**
|
||||
* Instrumented test, which will execute on an Android device.
|
||||
*
|
||||
* @see <a href="http://d.android.com/tools/testing">Testing documentation</a>
|
||||
*/
|
||||
@RunWith(AndroidJUnit4.class)
|
||||
public class ExampleInstrumentedTest {
|
||||
@Test
|
||||
public void useAppContext() {
|
||||
// Context of the app under test.
|
||||
Context appContext = InstrumentationRegistry.getTargetContext();
|
||||
|
||||
assertEquals("com.baidu.paddle.lite.demo", appContext.getPackageName());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
package="com.baidu.paddle.lite.demo.ocr">
|
||||
|
||||
<uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE"/>
|
||||
<uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE"/>
|
||||
<uses-permission android:name="android.permission.CAMERA"/>
|
||||
|
||||
|
||||
<application
|
||||
android:allowBackup="true"
|
||||
android:icon="@mipmap/ic_launcher"
|
||||
android:label="@string/app_name"
|
||||
android:roundIcon="@mipmap/ic_launcher_round"
|
||||
android:supportsRtl="true"
|
||||
android:theme="@style/AppTheme">
|
||||
<!-- to test MiniActivity, change this to com.baidu.paddle.lite.demo.ocr.MiniActivity -->
|
||||
<activity android:name="com.baidu.paddle.lite.demo.ocr.MainActivity">
|
||||
<intent-filter>
|
||||
<action android:name="android.intent.action.MAIN"/>
|
||||
<category android:name="android.intent.category.LAUNCHER"/>
|
||||
</intent-filter>
|
||||
</activity>
|
||||
<activity
|
||||
android:name="com.baidu.paddle.lite.demo.ocr.SettingsActivity"
|
||||
android:label="Settings">
|
||||
</activity>
|
||||
<provider
|
||||
android:name="androidx.core.content.FileProvider"
|
||||
android:authorities="com.baidu.paddle.lite.demo.ocr.fileprovider"
|
||||
android:exported="false"
|
||||
android:grantUriPermissions="true">
|
||||
<meta-data
|
||||
android:name="android.support.FILE_PROVIDER_PATHS"
|
||||
android:resource="@xml/file_paths"></meta-data>
|
||||
</provider>
|
||||
</application>
|
||||
|
||||
</manifest>
|
After Width: | Height: | Size: 62 KiB |
After Width: | Height: | Size: 63 KiB |
After Width: | Height: | Size: 171 KiB |
After Width: | Height: | Size: 61 KiB |
|
@ -0,0 +1,117 @@
|
|||
# For more information about using CMake with Android Studio, read the
|
||||
# documentation: https://d.android.com/studio/projects/add-native-code.html
|
||||
|
||||
# Sets the minimum version of CMake required to build the native library.
|
||||
|
||||
cmake_minimum_required(VERSION 3.4.1)
|
||||
|
||||
# Creates and names a library, sets it as either STATIC or SHARED, and provides
|
||||
# the relative paths to its source code. You can define multiple libraries, and
|
||||
# CMake builds them for you. Gradle automatically packages shared libraries with
|
||||
# your APK.
|
||||
|
||||
set(PaddleLite_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../../PaddleLite")
|
||||
include_directories(${PaddleLite_DIR}/cxx/include)
|
||||
|
||||
set(OpenCV_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../../OpenCV/sdk/native/jni")
|
||||
message(STATUS "opencv dir: ${OpenCV_DIR}")
|
||||
find_package(OpenCV REQUIRED)
|
||||
message(STATUS "OpenCV libraries: ${OpenCV_LIBS}")
|
||||
include_directories(${OpenCV_INCLUDE_DIRS})
|
||||
aux_source_directory(. SOURCES)
|
||||
set(CMAKE_CXX_FLAGS
|
||||
"${CMAKE_CXX_FLAGS} -ffast-math -Ofast -Os"
|
||||
)
|
||||
set(CMAKE_CXX_FLAGS
|
||||
"${CMAKE_CXX_FLAGS} -fvisibility=hidden -fvisibility-inlines-hidden -fdata-sections -ffunction-sections"
|
||||
)
|
||||
set(CMAKE_SHARED_LINKER_FLAGS
|
||||
"${CMAKE_SHARED_LINKER_FLAGS} -Wl,--gc-sections -Wl,-z,nocopyreloc")
|
||||
|
||||
add_library(
|
||||
# Sets the name of the library.
|
||||
Native
|
||||
# Sets the library as a shared library.
|
||||
SHARED
|
||||
# Provides a relative path to your source file(s).
|
||||
${SOURCES})
|
||||
|
||||
find_library(
|
||||
# Sets the name of the path variable.
|
||||
log-lib
|
||||
# Specifies the name of the NDK library that you want CMake to locate.
|
||||
log)
|
||||
|
||||
add_library(
|
||||
# Sets the name of the library.
|
||||
paddle_light_api_shared
|
||||
# Sets the library as a shared library.
|
||||
SHARED
|
||||
# Provides a relative path to your source file(s).
|
||||
IMPORTED)
|
||||
|
||||
set_target_properties(
|
||||
# Specifies the target library.
|
||||
paddle_light_api_shared
|
||||
# Specifies the parameter you want to define.
|
||||
PROPERTIES
|
||||
IMPORTED_LOCATION
|
||||
${PaddleLite_DIR}/cxx/libs/${ANDROID_ABI}/libpaddle_light_api_shared.so
|
||||
# Provides the path to the library you want to import.
|
||||
)
|
||||
|
||||
|
||||
# Specifies libraries CMake should link to your target library. You can link
|
||||
# multiple libraries, such as libraries you define in this build script,
|
||||
# prebuilt third-party libraries, or system libraries.
|
||||
|
||||
target_link_libraries(
|
||||
# Specifies the target library.
|
||||
Native
|
||||
paddle_light_api_shared
|
||||
${OpenCV_LIBS}
|
||||
GLESv2
|
||||
EGL
|
||||
jnigraphics
|
||||
${log-lib}
|
||||
)
|
||||
|
||||
add_custom_command(
|
||||
TARGET Native
|
||||
POST_BUILD
|
||||
COMMAND
|
||||
${CMAKE_COMMAND} -E copy
|
||||
${PaddleLite_DIR}/cxx/libs/${ANDROID_ABI}/libc++_shared.so
|
||||
${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libc++_shared.so)
|
||||
|
||||
add_custom_command(
|
||||
TARGET Native
|
||||
POST_BUILD
|
||||
COMMAND
|
||||
${CMAKE_COMMAND} -E copy
|
||||
${PaddleLite_DIR}/cxx/libs/${ANDROID_ABI}/libpaddle_light_api_shared.so
|
||||
${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libpaddle_light_api_shared.so)
|
||||
|
||||
add_custom_command(
|
||||
TARGET Native
|
||||
POST_BUILD
|
||||
COMMAND
|
||||
${CMAKE_COMMAND} -E copy
|
||||
${PaddleLite_DIR}/cxx/libs/${ANDROID_ABI}/libhiai.so
|
||||
${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libhiai.so)
|
||||
|
||||
add_custom_command(
|
||||
TARGET Native
|
||||
POST_BUILD
|
||||
COMMAND
|
||||
${CMAKE_COMMAND} -E copy
|
||||
${PaddleLite_DIR}/cxx/libs/${ANDROID_ABI}/libhiai_ir.so
|
||||
${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libhiai_ir.so)
|
||||
|
||||
add_custom_command(
|
||||
TARGET Native
|
||||
POST_BUILD
|
||||
COMMAND
|
||||
${CMAKE_COMMAND} -E copy
|
||||
${PaddleLite_DIR}/cxx/libs/${ANDROID_ABI}/libhiai_ir_build.so
|
||||
${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libhiai_ir_build.so)
|
|
@ -0,0 +1,37 @@
|
|||
//
|
||||
// Created by fu on 4/25/18.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
#import <numeric>
|
||||
#import <vector>
|
||||
|
||||
#ifdef __ANDROID__
|
||||
|
||||
#include <android/log.h>
|
||||
|
||||
#define LOG_TAG "OCR_NDK"
|
||||
|
||||
#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__)
|
||||
#define LOGW(...) __android_log_print(ANDROID_LOG_WARN, LOG_TAG, __VA_ARGS__)
|
||||
#define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__)
|
||||
#else
|
||||
#include <stdio.h>
|
||||
#define LOGI(format, ...) \
|
||||
fprintf(stdout, "[" LOG_TAG "]" format "\n", ##__VA_ARGS__)
|
||||
#define LOGW(format, ...) \
|
||||
fprintf(stdout, "[" LOG_TAG "]" format "\n", ##__VA_ARGS__)
|
||||
#define LOGE(format, ...) \
|
||||
fprintf(stderr, "[" LOG_TAG "]Error: " format "\n", ##__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
enum RETURN_CODE { RETURN_OK = 0 };
|
||||
|
||||
enum NET_TYPE { NET_OCR = 900100, NET_OCR_INTERNAL = 991008 };
|
||||
|
||||
template <typename T> inline T product(const std::vector<T> &vec) {
|
||||
if (vec.empty()) {
|
||||
return 0;
|
||||
}
|
||||
return std::accumulate(vec.begin(), vec.end(), 1, std::multiplies<T>());
|
||||
}
|
|
@ -0,0 +1,114 @@
|
|||
//
|
||||
// Created by fujiayi on 2020/7/5.
|
||||
//
|
||||
|
||||
#include "native.h"
|
||||
#include "ocr_ppredictor.h"
|
||||
#include <algorithm>
|
||||
#include <paddle_api.h>
|
||||
#include <string>
|
||||
|
||||
static paddle::lite_api::PowerMode str_to_cpu_mode(const std::string &cpu_mode);
|
||||
|
||||
extern "C" JNIEXPORT jlong JNICALL
|
||||
Java_com_baidu_paddle_lite_demo_ocr_OCRPredictorNative_init(
|
||||
JNIEnv *env, jobject thiz, jstring j_det_model_path,
|
||||
jstring j_rec_model_path, jstring j_cls_model_path, jint j_thread_num,
|
||||
jstring j_cpu_mode) {
|
||||
std::string det_model_path = jstring_to_cpp_string(env, j_det_model_path);
|
||||
std::string rec_model_path = jstring_to_cpp_string(env, j_rec_model_path);
|
||||
std::string cls_model_path = jstring_to_cpp_string(env, j_cls_model_path);
|
||||
int thread_num = j_thread_num;
|
||||
std::string cpu_mode = jstring_to_cpp_string(env, j_cpu_mode);
|
||||
ppredictor::OCR_Config conf;
|
||||
conf.thread_num = thread_num;
|
||||
conf.mode = str_to_cpu_mode(cpu_mode);
|
||||
ppredictor::OCR_PPredictor *orc_predictor =
|
||||
new ppredictor::OCR_PPredictor{conf};
|
||||
orc_predictor->init_from_file(det_model_path, rec_model_path, cls_model_path);
|
||||
return reinterpret_cast<jlong>(orc_predictor);
|
||||
}
|
||||
|
||||
/**
|
||||
* "LITE_POWER_HIGH" convert to paddle::lite_api::LITE_POWER_HIGH
|
||||
* @param cpu_mode
|
||||
* @return
|
||||
*/
|
||||
static paddle::lite_api::PowerMode
|
||||
str_to_cpu_mode(const std::string &cpu_mode) {
|
||||
static std::map<std::string, paddle::lite_api::PowerMode> cpu_mode_map{
|
||||
{"LITE_POWER_HIGH", paddle::lite_api::LITE_POWER_HIGH},
|
||||
{"LITE_POWER_LOW", paddle::lite_api::LITE_POWER_HIGH},
|
||||
{"LITE_POWER_FULL", paddle::lite_api::LITE_POWER_FULL},
|
||||
{"LITE_POWER_NO_BIND", paddle::lite_api::LITE_POWER_NO_BIND},
|
||||
{"LITE_POWER_RAND_HIGH", paddle::lite_api::LITE_POWER_RAND_HIGH},
|
||||
{"LITE_POWER_RAND_LOW", paddle::lite_api::LITE_POWER_RAND_LOW}};
|
||||
std::string upper_key;
|
||||
std::transform(cpu_mode.cbegin(), cpu_mode.cend(), upper_key.begin(),
|
||||
::toupper);
|
||||
auto index = cpu_mode_map.find(upper_key);
|
||||
if (index == cpu_mode_map.end()) {
|
||||
LOGE("cpu_mode not found %s", upper_key.c_str());
|
||||
return paddle::lite_api::LITE_POWER_HIGH;
|
||||
} else {
|
||||
return index->second;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" JNIEXPORT jfloatArray JNICALL
|
||||
Java_com_baidu_paddle_lite_demo_ocr_OCRPredictorNative_forward(
|
||||
JNIEnv *env, jobject thiz, jlong java_pointer, jfloatArray buf,
|
||||
jfloatArray ddims, jobject original_image) {
|
||||
LOGI("begin to run native forward");
|
||||
if (java_pointer == 0) {
|
||||
LOGE("JAVA pointer is NULL");
|
||||
return cpp_array_to_jfloatarray(env, nullptr, 0);
|
||||
}
|
||||
cv::Mat origin = bitmap_to_cv_mat(env, original_image);
|
||||
if (origin.size == 0) {
|
||||
LOGE("origin bitmap cannot convert to CV Mat");
|
||||
return cpp_array_to_jfloatarray(env, nullptr, 0);
|
||||
}
|
||||
ppredictor::OCR_PPredictor *ppredictor =
|
||||
(ppredictor::OCR_PPredictor *)java_pointer;
|
||||
std::vector<float> dims_float_arr = jfloatarray_to_float_vector(env, ddims);
|
||||
std::vector<int64_t> dims_arr;
|
||||
dims_arr.resize(dims_float_arr.size());
|
||||
std::copy(dims_float_arr.cbegin(), dims_float_arr.cend(), dims_arr.begin());
|
||||
|
||||
// 这里值有点大,就不调用jfloatarray_to_float_vector了
|
||||
int64_t buf_len = (int64_t)env->GetArrayLength(buf);
|
||||
jfloat *buf_data = env->GetFloatArrayElements(buf, JNI_FALSE);
|
||||
float *data = (jfloat *)buf_data;
|
||||
std::vector<ppredictor::OCRPredictResult> results =
|
||||
ppredictor->infer_ocr(dims_arr, data, buf_len, NET_OCR, origin);
|
||||
LOGI("infer_ocr finished with boxes %ld", results.size());
|
||||
// 这里将std::vector<ppredictor::OCRPredictResult> 序列化成
|
||||
// float数组,传输到java层再反序列化
|
||||
std::vector<float> float_arr;
|
||||
for (const ppredictor::OCRPredictResult &r : results) {
|
||||
float_arr.push_back(r.points.size());
|
||||
float_arr.push_back(r.word_index.size());
|
||||
float_arr.push_back(r.score);
|
||||
for (const std::vector<int> &point : r.points) {
|
||||
float_arr.push_back(point.at(0));
|
||||
float_arr.push_back(point.at(1));
|
||||
}
|
||||
for (int index : r.word_index) {
|
||||
float_arr.push_back(index);
|
||||
}
|
||||
}
|
||||
return cpp_array_to_jfloatarray(env, float_arr.data(), float_arr.size());
|
||||
}
|
||||
|
||||
extern "C" JNIEXPORT void JNICALL
|
||||
Java_com_baidu_paddle_lite_demo_ocr_OCRPredictorNative_release(
|
||||
JNIEnv *env, jobject thiz, jlong java_pointer) {
|
||||
if (java_pointer == 0) {
|
||||
LOGE("JAVA pointer is NULL");
|
||||
return;
|
||||
}
|
||||
ppredictor::OCR_PPredictor *ppredictor =
|
||||
(ppredictor::OCR_PPredictor *)java_pointer;
|
||||
delete ppredictor;
|
||||
}
|
|
@ -0,0 +1,137 @@
|
|||
//
|
||||
// Created by fujiayi on 2020/7/5.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common.h"
|
||||
#include <android/bitmap.h>
|
||||
#include <jni.h>
|
||||
#include <opencv2/opencv.hpp>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
inline std::string jstring_to_cpp_string(JNIEnv *env, jstring jstr) {
|
||||
// In java, a unicode char will be encoded using 2 bytes (utf16).
|
||||
// so jstring will contain characters utf16. std::string in c++ is
|
||||
// essentially a string of bytes, not characters, so if we want to
|
||||
// pass jstring from JNI to c++, we have convert utf16 to bytes.
|
||||
if (!jstr) {
|
||||
return "";
|
||||
}
|
||||
const jclass stringClass = env->GetObjectClass(jstr);
|
||||
const jmethodID getBytes =
|
||||
env->GetMethodID(stringClass, "getBytes", "(Ljava/lang/String;)[B");
|
||||
const jbyteArray stringJbytes = (jbyteArray)env->CallObjectMethod(
|
||||
jstr, getBytes, env->NewStringUTF("UTF-8"));
|
||||
|
||||
size_t length = (size_t)env->GetArrayLength(stringJbytes);
|
||||
jbyte *pBytes = env->GetByteArrayElements(stringJbytes, NULL);
|
||||
|
||||
std::string ret = std::string(reinterpret_cast<char *>(pBytes), length);
|
||||
env->ReleaseByteArrayElements(stringJbytes, pBytes, JNI_ABORT);
|
||||
|
||||
env->DeleteLocalRef(stringJbytes);
|
||||
env->DeleteLocalRef(stringClass);
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline jstring cpp_string_to_jstring(JNIEnv *env, std::string str) {
|
||||
auto *data = str.c_str();
|
||||
jclass strClass = env->FindClass("java/lang/String");
|
||||
jmethodID strClassInitMethodID =
|
||||
env->GetMethodID(strClass, "<init>", "([BLjava/lang/String;)V");
|
||||
|
||||
jbyteArray bytes = env->NewByteArray(strlen(data));
|
||||
env->SetByteArrayRegion(bytes, 0, strlen(data),
|
||||
reinterpret_cast<const jbyte *>(data));
|
||||
|
||||
jstring encoding = env->NewStringUTF("UTF-8");
|
||||
jstring res = (jstring)(
|
||||
env->NewObject(strClass, strClassInitMethodID, bytes, encoding));
|
||||
|
||||
env->DeleteLocalRef(strClass);
|
||||
env->DeleteLocalRef(encoding);
|
||||
env->DeleteLocalRef(bytes);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
inline jfloatArray cpp_array_to_jfloatarray(JNIEnv *env, const float *buf,
|
||||
int64_t len) {
|
||||
if (len == 0) {
|
||||
return env->NewFloatArray(0);
|
||||
}
|
||||
jfloatArray result = env->NewFloatArray(len);
|
||||
env->SetFloatArrayRegion(result, 0, len, buf);
|
||||
return result;
|
||||
}
|
||||
|
||||
inline jintArray cpp_array_to_jintarray(JNIEnv *env, const int *buf,
|
||||
int64_t len) {
|
||||
jintArray result = env->NewIntArray(len);
|
||||
env->SetIntArrayRegion(result, 0, len, buf);
|
||||
return result;
|
||||
}
|
||||
|
||||
inline jbyteArray cpp_array_to_jbytearray(JNIEnv *env, const int8_t *buf,
|
||||
int64_t len) {
|
||||
jbyteArray result = env->NewByteArray(len);
|
||||
env->SetByteArrayRegion(result, 0, len, buf);
|
||||
return result;
|
||||
}
|
||||
|
||||
inline jlongArray int64_vector_to_jlongarray(JNIEnv *env,
|
||||
const std::vector<int64_t> &vec) {
|
||||
jlongArray result = env->NewLongArray(vec.size());
|
||||
jlong *buf = new jlong[vec.size()];
|
||||
for (size_t i = 0; i < vec.size(); ++i) {
|
||||
buf[i] = (jlong)vec[i];
|
||||
}
|
||||
env->SetLongArrayRegion(result, 0, vec.size(), buf);
|
||||
delete[] buf;
|
||||
return result;
|
||||
}
|
||||
|
||||
inline std::vector<int64_t> jlongarray_to_int64_vector(JNIEnv *env,
|
||||
jlongArray data) {
|
||||
int data_size = env->GetArrayLength(data);
|
||||
jlong *data_ptr = env->GetLongArrayElements(data, nullptr);
|
||||
std::vector<int64_t> data_vec(data_ptr, data_ptr + data_size);
|
||||
env->ReleaseLongArrayElements(data, data_ptr, 0);
|
||||
return data_vec;
|
||||
}
|
||||
|
||||
inline std::vector<float> jfloatarray_to_float_vector(JNIEnv *env,
|
||||
jfloatArray data) {
|
||||
int data_size = env->GetArrayLength(data);
|
||||
jfloat *data_ptr = env->GetFloatArrayElements(data, nullptr);
|
||||
std::vector<float> data_vec(data_ptr, data_ptr + data_size);
|
||||
env->ReleaseFloatArrayElements(data, data_ptr, 0);
|
||||
return data_vec;
|
||||
}
|
||||
|
||||
inline cv::Mat bitmap_to_cv_mat(JNIEnv *env, jobject bitmap) {
|
||||
AndroidBitmapInfo info;
|
||||
int result = AndroidBitmap_getInfo(env, bitmap, &info);
|
||||
if (result != ANDROID_BITMAP_RESULT_SUCCESS) {
|
||||
LOGE("AndroidBitmap_getInfo failed, result: %d", result);
|
||||
return cv::Mat{};
|
||||
}
|
||||
if (info.format != ANDROID_BITMAP_FORMAT_RGBA_8888) {
|
||||
LOGE("Bitmap format is not RGBA_8888 !");
|
||||
return cv::Mat{};
|
||||
}
|
||||
unsigned char *srcData = NULL;
|
||||
AndroidBitmap_lockPixels(env, bitmap, (void **)&srcData);
|
||||
cv::Mat mat = cv::Mat::zeros(info.height, info.width, CV_8UC4);
|
||||
memcpy(mat.data, srcData, info.height * info.width * 4);
|
||||
AndroidBitmap_unlockPixels(env, bitmap);
|
||||
cv::cvtColor(mat, mat, cv::COLOR_RGBA2BGR);
|
||||
/**
|
||||
if (!cv::imwrite("/sdcard/1/copy.jpg", mat)){
|
||||
LOGE("Write image failed " );
|
||||
}
|
||||
*/
|
||||
return mat;
|
||||
}
|
|
@ -0,0 +1,544 @@
|
|||
/*******************************************************************************
|
||||
* *
|
||||
* Author : Angus Johnson *
|
||||
* Version : 6.4.2 *
|
||||
* Date : 27 February 2017 *
|
||||
* Website : http://www.angusj.com *
|
||||
* Copyright : Angus Johnson 2010-2017 *
|
||||
* *
|
||||
* License: *
|
||||
* Use, modification & distribution is subject to Boost Software License Ver 1. *
|
||||
* http://www.boost.org/LICENSE_1_0.txt *
|
||||
* *
|
||||
* Attributions: *
|
||||
* The code in this library is an extension of Bala Vatti's clipping algorithm: *
|
||||
* "A generic solution to polygon clipping" *
|
||||
* Communications of the ACM, Vol 35, Issue 7 (July 1992) pp 56-63. *
|
||||
* http://portal.acm.org/citation.cfm?id=129906 *
|
||||
* *
|
||||
* Computer graphics and geometric modeling: implementation and algorithms *
|
||||
* By Max K. Agoston *
|
||||
* Springer; 1 edition (January 4, 2005) *
|
||||
* http://books.google.com/books?q=vatti+clipping+agoston *
|
||||
* *
|
||||
* See also: *
|
||||
* "Polygon Offsetting by Computing Winding Numbers" *
|
||||
* Paper no. DETC2005-85513 pp. 565-575 *
|
||||
* ASME 2005 International Design Engineering Technical Conferences *
|
||||
* and Computers and Information in Engineering Conference (IDETC/CIE2005) *
|
||||
* September 24-28, 2005 , Long Beach, California, USA *
|
||||
* http://www.me.berkeley.edu/~mcmains/pubs/DAC05OffsetPolygon.pdf *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef clipper_hpp
|
||||
#define clipper_hpp
|
||||
|
||||
#define CLIPPER_VERSION "6.4.2"
|
||||
|
||||
// use_int32: When enabled 32bit ints are used instead of 64bit ints. This
|
||||
// improve performance but coordinate values are limited to the range +/- 46340
|
||||
//#define use_int32
|
||||
|
||||
// use_xyz: adds a Z member to IntPoint. Adds a minor cost to perfomance.
|
||||
//#define use_xyz
|
||||
|
||||
// use_lines: Enables line clipping. Adds a very minor cost to performance.
|
||||
#define use_lines
|
||||
|
||||
// use_deprecated: Enables temporary support for the obsolete functions
|
||||
//#define use_deprecated
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
#include <list>
|
||||
#include <ostream>
|
||||
#include <queue>
|
||||
#include <set>
|
||||
#include <stdexcept>
|
||||
#include <vector>
|
||||
|
||||
namespace ClipperLib {
|
||||
|
||||
enum ClipType { ctIntersection, ctUnion, ctDifference, ctXor };
|
||||
enum PolyType { ptSubject, ptClip };
|
||||
// By far the most widely used winding rules for polygon filling are
|
||||
// EvenOdd & NonZero (GDI, GDI+, XLib, OpenGL, Cairo, AGG, Quartz, SVG, Gr32)
|
||||
// Others rules include Positive, Negative and ABS_GTR_EQ_TWO (only in OpenGL)
|
||||
// see http://glprogramming.com/red/chapter11.html
|
||||
enum PolyFillType { pftEvenOdd, pftNonZero, pftPositive, pftNegative };
|
||||
|
||||
#ifdef use_int32
|
||||
typedef int cInt;
|
||||
static cInt const loRange = 0x7FFF;
|
||||
static cInt const hiRange = 0x7FFF;
|
||||
#else
|
||||
typedef signed long long cInt;
|
||||
static cInt const loRange = 0x3FFFFFFF;
|
||||
static cInt const hiRange = 0x3FFFFFFFFFFFFFFFLL;
|
||||
typedef signed long long long64; // used by Int128 class
|
||||
typedef unsigned long long ulong64;
|
||||
|
||||
#endif
|
||||
|
||||
struct IntPoint {
|
||||
cInt X;
|
||||
cInt Y;
|
||||
#ifdef use_xyz
|
||||
cInt Z;
|
||||
IntPoint(cInt x = 0, cInt y = 0, cInt z = 0) : X(x), Y(y), Z(z){};
|
||||
#else
|
||||
|
||||
IntPoint(cInt x = 0, cInt y = 0) : X(x), Y(y){};
|
||||
#endif
|
||||
|
||||
friend inline bool operator==(const IntPoint &a, const IntPoint &b) {
|
||||
return a.X == b.X && a.Y == b.Y;
|
||||
}
|
||||
|
||||
friend inline bool operator!=(const IntPoint &a, const IntPoint &b) {
|
||||
return a.X != b.X || a.Y != b.Y;
|
||||
}
|
||||
};
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
typedef std::vector<IntPoint> Path;
|
||||
typedef std::vector<Path> Paths;
|
||||
|
||||
inline Path &operator<<(Path &poly, const IntPoint &p) {
|
||||
poly.push_back(p);
|
||||
return poly;
|
||||
}
|
||||
|
||||
inline Paths &operator<<(Paths &polys, const Path &p) {
|
||||
polys.push_back(p);
|
||||
return polys;
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &s, const IntPoint &p);
|
||||
|
||||
std::ostream &operator<<(std::ostream &s, const Path &p);
|
||||
|
||||
std::ostream &operator<<(std::ostream &s, const Paths &p);
|
||||
|
||||
struct DoublePoint {
|
||||
double X;
|
||||
double Y;
|
||||
|
||||
DoublePoint(double x = 0, double y = 0) : X(x), Y(y) {}
|
||||
|
||||
DoublePoint(IntPoint ip) : X((double)ip.X), Y((double)ip.Y) {}
|
||||
};
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#ifdef use_xyz
|
||||
typedef void (*ZFillCallback)(IntPoint &e1bot, IntPoint &e1top, IntPoint &e2bot,
|
||||
IntPoint &e2top, IntPoint &pt);
|
||||
#endif
|
||||
|
||||
enum InitOptions {
|
||||
ioReverseSolution = 1,
|
||||
ioStrictlySimple = 2,
|
||||
ioPreserveCollinear = 4
|
||||
};
|
||||
enum JoinType { jtSquare, jtRound, jtMiter };
|
||||
enum EndType {
|
||||
etClosedPolygon,
|
||||
etClosedLine,
|
||||
etOpenButt,
|
||||
etOpenSquare,
|
||||
etOpenRound
|
||||
};
|
||||
|
||||
class PolyNode;
|
||||
|
||||
typedef std::vector<PolyNode *> PolyNodes;
|
||||
|
||||
class PolyNode {
|
||||
public:
|
||||
PolyNode();
|
||||
|
||||
virtual ~PolyNode(){};
|
||||
Path Contour;
|
||||
PolyNodes Childs;
|
||||
PolyNode *Parent;
|
||||
|
||||
PolyNode *GetNext() const;
|
||||
|
||||
bool IsHole() const;
|
||||
|
||||
bool IsOpen() const;
|
||||
|
||||
int ChildCount() const;
|
||||
|
||||
private:
|
||||
// PolyNode& operator =(PolyNode& other);
|
||||
unsigned Index; // node index in Parent.Childs
|
||||
bool m_IsOpen;
|
||||
JoinType m_jointype;
|
||||
EndType m_endtype;
|
||||
|
||||
PolyNode *GetNextSiblingUp() const;
|
||||
|
||||
void AddChild(PolyNode &child);
|
||||
|
||||
friend class Clipper; // to access Index
|
||||
friend class ClipperOffset;
|
||||
};
|
||||
|
||||
class PolyTree : public PolyNode {
|
||||
public:
|
||||
~PolyTree() { Clear(); };
|
||||
|
||||
PolyNode *GetFirst() const;
|
||||
|
||||
void Clear();
|
||||
|
||||
int Total() const;
|
||||
|
||||
private:
|
||||
// PolyTree& operator =(PolyTree& other);
|
||||
PolyNodes AllNodes;
|
||||
|
||||
friend class Clipper; // to access AllNodes
|
||||
};
|
||||
|
||||
bool Orientation(const Path &poly);
|
||||
|
||||
double Area(const Path &poly);
|
||||
|
||||
int PointInPolygon(const IntPoint &pt, const Path &path);
|
||||
|
||||
void SimplifyPolygon(const Path &in_poly, Paths &out_polys,
|
||||
PolyFillType fillType = pftEvenOdd);
|
||||
|
||||
void SimplifyPolygons(const Paths &in_polys, Paths &out_polys,
|
||||
PolyFillType fillType = pftEvenOdd);
|
||||
|
||||
void SimplifyPolygons(Paths &polys, PolyFillType fillType = pftEvenOdd);
|
||||
|
||||
void CleanPolygon(const Path &in_poly, Path &out_poly, double distance = 1.415);
|
||||
|
||||
void CleanPolygon(Path &poly, double distance = 1.415);
|
||||
|
||||
void CleanPolygons(const Paths &in_polys, Paths &out_polys,
|
||||
double distance = 1.415);
|
||||
|
||||
void CleanPolygons(Paths &polys, double distance = 1.415);
|
||||
|
||||
void MinkowskiSum(const Path &pattern, const Path &path, Paths &solution,
|
||||
bool pathIsClosed);
|
||||
|
||||
void MinkowskiSum(const Path &pattern, const Paths &paths, Paths &solution,
|
||||
bool pathIsClosed);
|
||||
|
||||
void MinkowskiDiff(const Path &poly1, const Path &poly2, Paths &solution);
|
||||
|
||||
void PolyTreeToPaths(const PolyTree &polytree, Paths &paths);
|
||||
|
||||
void ClosedPathsFromPolyTree(const PolyTree &polytree, Paths &paths);
|
||||
|
||||
void OpenPathsFromPolyTree(PolyTree &polytree, Paths &paths);
|
||||
|
||||
void ReversePath(Path &p);
|
||||
|
||||
void ReversePaths(Paths &p);
|
||||
|
||||
struct IntRect {
|
||||
cInt left;
|
||||
cInt top;
|
||||
cInt right;
|
||||
cInt bottom;
|
||||
};
|
||||
|
||||
// enums that are used internally ...
|
||||
enum EdgeSide { esLeft = 1, esRight = 2 };
|
||||
|
||||
// forward declarations (for stuff used internally) ...
|
||||
struct TEdge;
|
||||
struct IntersectNode;
|
||||
struct LocalMinimum;
|
||||
struct OutPt;
|
||||
struct OutRec;
|
||||
struct Join;
|
||||
|
||||
typedef std::vector<OutRec *> PolyOutList;
|
||||
typedef std::vector<TEdge *> EdgeList;
|
||||
typedef std::vector<Join *> JoinList;
|
||||
typedef std::vector<IntersectNode *> IntersectList;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
// ClipperBase is the ancestor to the Clipper class. It should not be
|
||||
// instantiated directly. This class simply abstracts the conversion of sets of
|
||||
// polygon coordinates into edge objects that are stored in a LocalMinima list.
|
||||
class ClipperBase {
|
||||
public:
|
||||
ClipperBase();
|
||||
|
||||
virtual ~ClipperBase();
|
||||
|
||||
virtual bool AddPath(const Path &pg, PolyType PolyTyp, bool Closed);
|
||||
|
||||
bool AddPaths(const Paths &ppg, PolyType PolyTyp, bool Closed);
|
||||
|
||||
virtual void Clear();
|
||||
|
||||
IntRect GetBounds();
|
||||
|
||||
bool PreserveCollinear() { return m_PreserveCollinear; };
|
||||
|
||||
void PreserveCollinear(bool value) { m_PreserveCollinear = value; };
|
||||
|
||||
protected:
|
||||
void DisposeLocalMinimaList();
|
||||
|
||||
TEdge *AddBoundsToLML(TEdge *e, bool IsClosed);
|
||||
|
||||
virtual void Reset();
|
||||
|
||||
TEdge *ProcessBound(TEdge *E, bool IsClockwise);
|
||||
|
||||
void InsertScanbeam(const cInt Y);
|
||||
|
||||
bool PopScanbeam(cInt &Y);
|
||||
|
||||
bool LocalMinimaPending();
|
||||
|
||||
bool PopLocalMinima(cInt Y, const LocalMinimum *&locMin);
|
||||
|
||||
OutRec *CreateOutRec();
|
||||
|
||||
void DisposeAllOutRecs();
|
||||
|
||||
void DisposeOutRec(PolyOutList::size_type index);
|
||||
|
||||
void SwapPositionsInAEL(TEdge *edge1, TEdge *edge2);
|
||||
|
||||
void DeleteFromAEL(TEdge *e);
|
||||
|
||||
void UpdateEdgeIntoAEL(TEdge *&e);
|
||||
|
||||
typedef std::vector<LocalMinimum> MinimaList;
|
||||
MinimaList::iterator m_CurrentLM;
|
||||
MinimaList m_MinimaList;
|
||||
|
||||
bool m_UseFullRange;
|
||||
EdgeList m_edges;
|
||||
bool m_PreserveCollinear;
|
||||
bool m_HasOpenPaths;
|
||||
PolyOutList m_PolyOuts;
|
||||
TEdge *m_ActiveEdges;
|
||||
|
||||
typedef std::priority_queue<cInt> ScanbeamList;
|
||||
ScanbeamList m_Scanbeam;
|
||||
};
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
class Clipper : public virtual ClipperBase {
|
||||
public:
|
||||
Clipper(int initOptions = 0);
|
||||
|
||||
bool Execute(ClipType clipType, Paths &solution,
|
||||
PolyFillType fillType = pftEvenOdd);
|
||||
|
||||
bool Execute(ClipType clipType, Paths &solution, PolyFillType subjFillType,
|
||||
PolyFillType clipFillType);
|
||||
|
||||
bool Execute(ClipType clipType, PolyTree &polytree,
|
||||
PolyFillType fillType = pftEvenOdd);
|
||||
|
||||
bool Execute(ClipType clipType, PolyTree &polytree, PolyFillType subjFillType,
|
||||
PolyFillType clipFillType);
|
||||
|
||||
bool ReverseSolution() { return m_ReverseOutput; };
|
||||
|
||||
void ReverseSolution(bool value) { m_ReverseOutput = value; };
|
||||
|
||||
bool StrictlySimple() { return m_StrictSimple; };
|
||||
|
||||
void StrictlySimple(bool value) { m_StrictSimple = value; };
|
||||
// set the callback function for z value filling on intersections (otherwise Z
|
||||
// is 0)
|
||||
#ifdef use_xyz
|
||||
void ZFillFunction(ZFillCallback zFillFunc);
|
||||
#endif
|
||||
protected:
|
||||
virtual bool ExecuteInternal();
|
||||
|
||||
private:
|
||||
JoinList m_Joins;
|
||||
JoinList m_GhostJoins;
|
||||
IntersectList m_IntersectList;
|
||||
ClipType m_ClipType;
|
||||
typedef std::list<cInt> MaximaList;
|
||||
MaximaList m_Maxima;
|
||||
TEdge *m_SortedEdges;
|
||||
bool m_ExecuteLocked;
|
||||
PolyFillType m_ClipFillType;
|
||||
PolyFillType m_SubjFillType;
|
||||
bool m_ReverseOutput;
|
||||
bool m_UsingPolyTree;
|
||||
bool m_StrictSimple;
|
||||
#ifdef use_xyz
|
||||
ZFillCallback m_ZFill; // custom callback
|
||||
#endif
|
||||
|
||||
void SetWindingCount(TEdge &edge);
|
||||
|
||||
bool IsEvenOddFillType(const TEdge &edge) const;
|
||||
|
||||
bool IsEvenOddAltFillType(const TEdge &edge) const;
|
||||
|
||||
void InsertLocalMinimaIntoAEL(const cInt botY);
|
||||
|
||||
void InsertEdgeIntoAEL(TEdge *edge, TEdge *startEdge);
|
||||
|
||||
void AddEdgeToSEL(TEdge *edge);
|
||||
|
||||
bool PopEdgeFromSEL(TEdge *&edge);
|
||||
|
||||
void CopyAELToSEL();
|
||||
|
||||
void DeleteFromSEL(TEdge *e);
|
||||
|
||||
void SwapPositionsInSEL(TEdge *edge1, TEdge *edge2);
|
||||
|
||||
bool IsContributing(const TEdge &edge) const;
|
||||
|
||||
bool IsTopHorz(const cInt XPos);
|
||||
|
||||
void DoMaxima(TEdge *e);
|
||||
|
||||
void ProcessHorizontals();
|
||||
|
||||
void ProcessHorizontal(TEdge *horzEdge);
|
||||
|
||||
void AddLocalMaxPoly(TEdge *e1, TEdge *e2, const IntPoint &pt);
|
||||
|
||||
OutPt *AddLocalMinPoly(TEdge *e1, TEdge *e2, const IntPoint &pt);
|
||||
|
||||
OutRec *GetOutRec(int idx);
|
||||
|
||||
void AppendPolygon(TEdge *e1, TEdge *e2);
|
||||
|
||||
void IntersectEdges(TEdge *e1, TEdge *e2, IntPoint &pt);
|
||||
|
||||
OutPt *AddOutPt(TEdge *e, const IntPoint &pt);
|
||||
|
||||
OutPt *GetLastOutPt(TEdge *e);
|
||||
|
||||
bool ProcessIntersections(const cInt topY);
|
||||
|
||||
void BuildIntersectList(const cInt topY);
|
||||
|
||||
void ProcessIntersectList();
|
||||
|
||||
void ProcessEdgesAtTopOfScanbeam(const cInt topY);
|
||||
|
||||
void BuildResult(Paths &polys);
|
||||
|
||||
void BuildResult2(PolyTree &polytree);
|
||||
|
||||
void SetHoleState(TEdge *e, OutRec *outrec);
|
||||
|
||||
void DisposeIntersectNodes();
|
||||
|
||||
bool FixupIntersectionOrder();
|
||||
|
||||
void FixupOutPolygon(OutRec &outrec);
|
||||
|
||||
void FixupOutPolyline(OutRec &outrec);
|
||||
|
||||
bool IsHole(TEdge *e);
|
||||
|
||||
bool FindOwnerFromSplitRecs(OutRec &outRec, OutRec *&currOrfl);
|
||||
|
||||
void FixHoleLinkage(OutRec &outrec);
|
||||
|
||||
void AddJoin(OutPt *op1, OutPt *op2, const IntPoint offPt);
|
||||
|
||||
void ClearJoins();
|
||||
|
||||
void ClearGhostJoins();
|
||||
|
||||
void AddGhostJoin(OutPt *op, const IntPoint offPt);
|
||||
|
||||
bool JoinPoints(Join *j, OutRec *outRec1, OutRec *outRec2);
|
||||
|
||||
void JoinCommonEdges();
|
||||
|
||||
void DoSimplePolygons();
|
||||
|
||||
void FixupFirstLefts1(OutRec *OldOutRec, OutRec *NewOutRec);
|
||||
|
||||
void FixupFirstLefts2(OutRec *InnerOutRec, OutRec *OuterOutRec);
|
||||
|
||||
void FixupFirstLefts3(OutRec *OldOutRec, OutRec *NewOutRec);
|
||||
|
||||
#ifdef use_xyz
|
||||
void SetZ(IntPoint &pt, TEdge &e1, TEdge &e2);
|
||||
#endif
|
||||
};
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
class ClipperOffset {
|
||||
public:
|
||||
ClipperOffset(double miterLimit = 2.0, double roundPrecision = 0.25);
|
||||
|
||||
~ClipperOffset();
|
||||
|
||||
void AddPath(const Path &path, JoinType joinType, EndType endType);
|
||||
|
||||
void AddPaths(const Paths &paths, JoinType joinType, EndType endType);
|
||||
|
||||
void Execute(Paths &solution, double delta);
|
||||
|
||||
void Execute(PolyTree &solution, double delta);
|
||||
|
||||
void Clear();
|
||||
|
||||
double MiterLimit;
|
||||
double ArcTolerance;
|
||||
|
||||
private:
|
||||
Paths m_destPolys;
|
||||
Path m_srcPoly;
|
||||
Path m_destPoly;
|
||||
std::vector<DoublePoint> m_normals;
|
||||
double m_delta, m_sinA, m_sin, m_cos;
|
||||
double m_miterLim, m_StepsPerRad;
|
||||
IntPoint m_lowest;
|
||||
PolyNode m_polyNodes;
|
||||
|
||||
void FixOrientations();
|
||||
|
||||
void DoOffset(double delta);
|
||||
|
||||
void OffsetPoint(int j, int &k, JoinType jointype);
|
||||
|
||||
void DoSquare(int j, int k);
|
||||
|
||||
void DoMiter(int j, int k, double r);
|
||||
|
||||
void DoRound(int j, int k);
|
||||
};
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
class clipperException : public std::exception {
|
||||
public:
|
||||
clipperException(const char *description) : m_descr(description) {}
|
||||
|
||||
virtual ~clipperException() throw() {}
|
||||
|
||||
virtual const char *what() const throw() { return m_descr.c_str(); }
|
||||
|
||||
private:
|
||||
std::string m_descr;
|
||||
};
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
} // ClipperLib namespace
|
||||
|
||||
#endif // clipper_hpp
|
|
@ -0,0 +1,46 @@
|
|||
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "ocr_cls_process.h"
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
const std::vector<int> CLS_IMAGE_SHAPE = {3, 48, 192};
|
||||
|
||||
cv::Mat cls_resize_img(const cv::Mat &img) {
|
||||
int imgC = CLS_IMAGE_SHAPE[0];
|
||||
int imgW = CLS_IMAGE_SHAPE[2];
|
||||
int imgH = CLS_IMAGE_SHAPE[1];
|
||||
|
||||
float ratio = float(img.cols) / float(img.rows);
|
||||
int resize_w = 0;
|
||||
if (ceilf(imgH * ratio) > imgW)
|
||||
resize_w = imgW;
|
||||
else
|
||||
resize_w = int(ceilf(imgH * ratio));
|
||||
|
||||
cv::Mat resize_img;
|
||||
cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f,
|
||||
cv::INTER_CUBIC);
|
||||
|
||||
if (resize_w < imgW) {
|
||||
cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, int(imgW - resize_w),
|
||||
cv::BORDER_CONSTANT, {0, 0, 0});
|
||||
}
|
||||
return resize_img;
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common.h"
|
||||
#include <opencv2/opencv.hpp>
|
||||
#include <vector>
|
||||
|
||||
extern const std::vector<int> CLS_IMAGE_SHAPE;
|
||||
|
||||
cv::Mat cls_resize_img(const cv::Mat &img);
|
|
@ -0,0 +1,142 @@
|
|||
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "ocr_crnn_process.h"
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
const std::string CHARACTER_TYPE = "ch";
|
||||
const int MAX_DICT_LENGTH = 6624;
|
||||
const std::vector<int> REC_IMAGE_SHAPE = {3, 32, 320};
|
||||
|
||||
static cv::Mat crnn_resize_norm_img(cv::Mat img, float wh_ratio) {
|
||||
int imgC = REC_IMAGE_SHAPE[0];
|
||||
int imgW = REC_IMAGE_SHAPE[2];
|
||||
int imgH = REC_IMAGE_SHAPE[1];
|
||||
|
||||
if (CHARACTER_TYPE == "ch")
|
||||
imgW = int(32 * wh_ratio);
|
||||
|
||||
float ratio = float(img.cols) / float(img.rows);
|
||||
int resize_w = 0;
|
||||
if (ceilf(imgH * ratio) > imgW)
|
||||
resize_w = imgW;
|
||||
else
|
||||
resize_w = int(ceilf(imgH * ratio));
|
||||
cv::Mat resize_img;
|
||||
cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f,
|
||||
cv::INTER_CUBIC);
|
||||
|
||||
resize_img.convertTo(resize_img, CV_32FC3, 1 / 255.f);
|
||||
|
||||
for (int h = 0; h < resize_img.rows; h++) {
|
||||
for (int w = 0; w < resize_img.cols; w++) {
|
||||
resize_img.at<cv::Vec3f>(h, w)[0] =
|
||||
(resize_img.at<cv::Vec3f>(h, w)[0] - 0.5) * 2;
|
||||
resize_img.at<cv::Vec3f>(h, w)[1] =
|
||||
(resize_img.at<cv::Vec3f>(h, w)[1] - 0.5) * 2;
|
||||
resize_img.at<cv::Vec3f>(h, w)[2] =
|
||||
(resize_img.at<cv::Vec3f>(h, w)[2] - 0.5) * 2;
|
||||
}
|
||||
}
|
||||
|
||||
cv::Mat dist;
|
||||
cv::copyMakeBorder(resize_img, dist, 0, 0, 0, int(imgW - resize_w),
|
||||
cv::BORDER_CONSTANT, {0, 0, 0});
|
||||
|
||||
return dist;
|
||||
}
|
||||
|
||||
cv::Mat crnn_resize_img(const cv::Mat &img, float wh_ratio) {
|
||||
int imgC = REC_IMAGE_SHAPE[0];
|
||||
int imgW = REC_IMAGE_SHAPE[2];
|
||||
int imgH = REC_IMAGE_SHAPE[1];
|
||||
|
||||
if (CHARACTER_TYPE == "ch") {
|
||||
imgW = int(32 * wh_ratio);
|
||||
}
|
||||
|
||||
float ratio = float(img.cols) / float(img.rows);
|
||||
int resize_w = 0;
|
||||
if (ceilf(imgH * ratio) > imgW)
|
||||
resize_w = imgW;
|
||||
else
|
||||
resize_w = int(ceilf(imgH * ratio));
|
||||
cv::Mat resize_img;
|
||||
cv::resize(img, resize_img, cv::Size(resize_w, imgH));
|
||||
return resize_img;
|
||||
}
|
||||
|
||||
cv::Mat get_rotate_crop_image(const cv::Mat &srcimage,
|
||||
const std::vector<std::vector<int>> &box) {
|
||||
|
||||
std::vector<std::vector<int>> points = box;
|
||||
|
||||
int x_collect[4] = {box[0][0], box[1][0], box[2][0], box[3][0]};
|
||||
int y_collect[4] = {box[0][1], box[1][1], box[2][1], box[3][1]};
|
||||
int left = int(*std::min_element(x_collect, x_collect + 4));
|
||||
int right = int(*std::max_element(x_collect, x_collect + 4));
|
||||
int top = int(*std::min_element(y_collect, y_collect + 4));
|
||||
int bottom = int(*std::max_element(y_collect, y_collect + 4));
|
||||
|
||||
cv::Mat img_crop;
|
||||
srcimage(cv::Rect(left, top, right - left, bottom - top)).copyTo(img_crop);
|
||||
|
||||
for (int i = 0; i < points.size(); i++) {
|
||||
points[i][0] -= left;
|
||||
points[i][1] -= top;
|
||||
}
|
||||
|
||||
int img_crop_width = int(sqrt(pow(points[0][0] - points[1][0], 2) +
|
||||
pow(points[0][1] - points[1][1], 2)));
|
||||
int img_crop_height = int(sqrt(pow(points[0][0] - points[3][0], 2) +
|
||||
pow(points[0][1] - points[3][1], 2)));
|
||||
|
||||
cv::Point2f pts_std[4];
|
||||
pts_std[0] = cv::Point2f(0., 0.);
|
||||
pts_std[1] = cv::Point2f(img_crop_width, 0.);
|
||||
pts_std[2] = cv::Point2f(img_crop_width, img_crop_height);
|
||||
pts_std[3] = cv::Point2f(0.f, img_crop_height);
|
||||
|
||||
cv::Point2f pointsf[4];
|
||||
pointsf[0] = cv::Point2f(points[0][0], points[0][1]);
|
||||
pointsf[1] = cv::Point2f(points[1][0], points[1][1]);
|
||||
pointsf[2] = cv::Point2f(points[2][0], points[2][1]);
|
||||
pointsf[3] = cv::Point2f(points[3][0], points[3][1]);
|
||||
|
||||
cv::Mat M = cv::getPerspectiveTransform(pointsf, pts_std);
|
||||
|
||||
cv::Mat dst_img;
|
||||
cv::warpPerspective(img_crop, dst_img, M,
|
||||
cv::Size(img_crop_width, img_crop_height),
|
||||
cv::BORDER_REPLICATE);
|
||||
|
||||
if (float(dst_img.rows) >= float(dst_img.cols) * 1.5) {
|
||||
/*
|
||||
cv::Mat srcCopy = cv::Mat(dst_img.rows, dst_img.cols, dst_img.depth());
|
||||
cv::transpose(dst_img, srcCopy);
|
||||
cv::flip(srcCopy, srcCopy, 0);
|
||||
return srcCopy;
|
||||
*/
|
||||
cv::transpose(dst_img, dst_img);
|
||||
cv::flip(dst_img, dst_img, 0);
|
||||
return dst_img;
|
||||
} else {
|
||||
return dst_img;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
//
|
||||
// Created by fujiayi on 2020/7/3.
|
||||
//
|
||||
#pragma once
|
||||
|
||||
#include "common.h"
|
||||
#include <opencv2/opencv.hpp>
|
||||
#include <vector>
|
||||
|
||||
extern const std::vector<int> REC_IMAGE_SHAPE;
|
||||
|
||||
cv::Mat get_rotate_crop_image(const cv::Mat &srcimage,
|
||||
const std::vector<std::vector<int>> &box);
|
||||
|
||||
cv::Mat crnn_resize_img(const cv::Mat &img, float wh_ratio);
|
||||
|
||||
template <class ForwardIterator>
|
||||
inline size_t argmax(ForwardIterator first, ForwardIterator last) {
|
||||
return std::distance(first, std::max_element(first, last));
|
||||
}
|
|
@ -0,0 +1,342 @@
|
|||
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "ocr_clipper.hpp"
|
||||
#include "opencv2/core.hpp"
|
||||
#include "opencv2/imgcodecs.hpp"
|
||||
#include "opencv2/imgproc.hpp"
|
||||
#include <iostream>
|
||||
#include <math.h>
|
||||
#include <vector>
|
||||
|
||||
static void getcontourarea(float **box, float unclip_ratio, float &distance) {
|
||||
int pts_num = 4;
|
||||
float area = 0.0f;
|
||||
float dist = 0.0f;
|
||||
for (int i = 0; i < pts_num; i++) {
|
||||
area += box[i][0] * box[(i + 1) % pts_num][1] -
|
||||
box[i][1] * box[(i + 1) % pts_num][0];
|
||||
dist += sqrtf((box[i][0] - box[(i + 1) % pts_num][0]) *
|
||||
(box[i][0] - box[(i + 1) % pts_num][0]) +
|
||||
(box[i][1] - box[(i + 1) % pts_num][1]) *
|
||||
(box[i][1] - box[(i + 1) % pts_num][1]));
|
||||
}
|
||||
area = fabs(float(area / 2.0));
|
||||
|
||||
distance = area * unclip_ratio / dist;
|
||||
}
|
||||
|
||||
static cv::RotatedRect unclip(float **box) {
|
||||
float unclip_ratio = 2.0;
|
||||
float distance = 1.0;
|
||||
|
||||
getcontourarea(box, unclip_ratio, distance);
|
||||
|
||||
ClipperLib::ClipperOffset offset;
|
||||
ClipperLib::Path p;
|
||||
p << ClipperLib::IntPoint(int(box[0][0]), int(box[0][1]))
|
||||
<< ClipperLib::IntPoint(int(box[1][0]), int(box[1][1]))
|
||||
<< ClipperLib::IntPoint(int(box[2][0]), int(box[2][1]))
|
||||
<< ClipperLib::IntPoint(int(box[3][0]), int(box[3][1]));
|
||||
offset.AddPath(p, ClipperLib::jtRound, ClipperLib::etClosedPolygon);
|
||||
|
||||
ClipperLib::Paths soln;
|
||||
offset.Execute(soln, distance);
|
||||
std::vector<cv::Point2f> points;
|
||||
|
||||
for (int j = 0; j < soln.size(); j++) {
|
||||
for (int i = 0; i < soln[soln.size() - 1].size(); i++) {
|
||||
points.emplace_back(soln[j][i].X, soln[j][i].Y);
|
||||
}
|
||||
}
|
||||
cv::RotatedRect res = cv::minAreaRect(points);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static float **Mat2Vec(cv::Mat mat) {
|
||||
auto **array = new float *[mat.rows];
|
||||
for (int i = 0; i < mat.rows; ++i) {
|
||||
array[i] = new float[mat.cols];
|
||||
}
|
||||
for (int i = 0; i < mat.rows; ++i) {
|
||||
for (int j = 0; j < mat.cols; ++j) {
|
||||
array[i][j] = mat.at<float>(i, j);
|
||||
}
|
||||
}
|
||||
|
||||
return array;
|
||||
}
|
||||
|
||||
static void quickSort(float **s, int l, int r) {
|
||||
if (l < r) {
|
||||
int i = l, j = r;
|
||||
float x = s[l][0];
|
||||
float *xp = s[l];
|
||||
while (i < j) {
|
||||
while (i < j && s[j][0] >= x) {
|
||||
j--;
|
||||
}
|
||||
if (i < j) {
|
||||
std::swap(s[i++], s[j]);
|
||||
}
|
||||
while (i < j && s[i][0] < x) {
|
||||
i++;
|
||||
}
|
||||
if (i < j) {
|
||||
std::swap(s[j--], s[i]);
|
||||
}
|
||||
}
|
||||
s[i] = xp;
|
||||
quickSort(s, l, i - 1);
|
||||
quickSort(s, i + 1, r);
|
||||
}
|
||||
}
|
||||
|
||||
static void quickSort_vector(std::vector<std::vector<int>> &box, int l, int r,
|
||||
int axis) {
|
||||
if (l < r) {
|
||||
int i = l, j = r;
|
||||
int x = box[l][axis];
|
||||
std::vector<int> xp(box[l]);
|
||||
while (i < j) {
|
||||
while (i < j && box[j][axis] >= x) {
|
||||
j--;
|
||||
}
|
||||
if (i < j) {
|
||||
std::swap(box[i++], box[j]);
|
||||
}
|
||||
while (i < j && box[i][axis] < x) {
|
||||
i++;
|
||||
}
|
||||
if (i < j) {
|
||||
std::swap(box[j--], box[i]);
|
||||
}
|
||||
}
|
||||
box[i] = xp;
|
||||
quickSort_vector(box, l, i - 1, axis);
|
||||
quickSort_vector(box, i + 1, r, axis);
|
||||
}
|
||||
}
|
||||
|
||||
static std::vector<std::vector<int>>
|
||||
order_points_clockwise(std::vector<std::vector<int>> pts) {
|
||||
std::vector<std::vector<int>> box = pts;
|
||||
quickSort_vector(box, 0, int(box.size() - 1), 0);
|
||||
std::vector<std::vector<int>> leftmost = {box[0], box[1]};
|
||||
std::vector<std::vector<int>> rightmost = {box[2], box[3]};
|
||||
|
||||
if (leftmost[0][1] > leftmost[1][1]) {
|
||||
std::swap(leftmost[0], leftmost[1]);
|
||||
}
|
||||
|
||||
if (rightmost[0][1] > rightmost[1][1]) {
|
||||
std::swap(rightmost[0], rightmost[1]);
|
||||
}
|
||||
|
||||
std::vector<std::vector<int>> rect = {leftmost[0], rightmost[0], rightmost[1],
|
||||
leftmost[1]};
|
||||
return rect;
|
||||
}
|
||||
|
||||
static float **get_mini_boxes(cv::RotatedRect box, float &ssid) {
|
||||
ssid = box.size.width >= box.size.height ? box.size.height : box.size.width;
|
||||
|
||||
cv::Mat points;
|
||||
cv::boxPoints(box, points);
|
||||
// sorted box points
|
||||
auto array = Mat2Vec(points);
|
||||
quickSort(array, 0, 3);
|
||||
|
||||
float *idx1 = array[0], *idx2 = array[1], *idx3 = array[2], *idx4 = array[3];
|
||||
if (array[3][1] <= array[2][1]) {
|
||||
idx2 = array[3];
|
||||
idx3 = array[2];
|
||||
} else {
|
||||
idx2 = array[2];
|
||||
idx3 = array[3];
|
||||
}
|
||||
if (array[1][1] <= array[0][1]) {
|
||||
idx1 = array[1];
|
||||
idx4 = array[0];
|
||||
} else {
|
||||
idx1 = array[0];
|
||||
idx4 = array[1];
|
||||
}
|
||||
|
||||
array[0] = idx1;
|
||||
array[1] = idx2;
|
||||
array[2] = idx3;
|
||||
array[3] = idx4;
|
||||
|
||||
return array;
|
||||
}
|
||||
|
||||
template <class T> T clamp(T x, T min, T max) {
|
||||
if (x > max) {
|
||||
return max;
|
||||
}
|
||||
if (x < min) {
|
||||
return min;
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
static float clampf(float x, float min, float max) {
|
||||
if (x > max)
|
||||
return max;
|
||||
if (x < min)
|
||||
return min;
|
||||
return x;
|
||||
}
|
||||
|
||||
float box_score_fast(float **box_array, cv::Mat pred) {
|
||||
auto array = box_array;
|
||||
int width = pred.cols;
|
||||
int height = pred.rows;
|
||||
|
||||
float box_x[4] = {array[0][0], array[1][0], array[2][0], array[3][0]};
|
||||
float box_y[4] = {array[0][1], array[1][1], array[2][1], array[3][1]};
|
||||
|
||||
int xmin = clamp(int(std::floorf(*(std::min_element(box_x, box_x + 4)))), 0,
|
||||
width - 1);
|
||||
int xmax = clamp(int(std::ceilf(*(std::max_element(box_x, box_x + 4)))), 0,
|
||||
width - 1);
|
||||
int ymin = clamp(int(std::floorf(*(std::min_element(box_y, box_y + 4)))), 0,
|
||||
height - 1);
|
||||
int ymax = clamp(int(std::ceilf(*(std::max_element(box_y, box_y + 4)))), 0,
|
||||
height - 1);
|
||||
|
||||
cv::Mat mask;
|
||||
mask = cv::Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8UC1);
|
||||
|
||||
cv::Point root_point[4];
|
||||
root_point[0] = cv::Point(int(array[0][0]) - xmin, int(array[0][1]) - ymin);
|
||||
root_point[1] = cv::Point(int(array[1][0]) - xmin, int(array[1][1]) - ymin);
|
||||
root_point[2] = cv::Point(int(array[2][0]) - xmin, int(array[2][1]) - ymin);
|
||||
root_point[3] = cv::Point(int(array[3][0]) - xmin, int(array[3][1]) - ymin);
|
||||
const cv::Point *ppt[1] = {root_point};
|
||||
int npt[] = {4};
|
||||
cv::fillPoly(mask, ppt, npt, 1, cv::Scalar(1));
|
||||
|
||||
cv::Mat croppedImg;
|
||||
pred(cv::Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1))
|
||||
.copyTo(croppedImg);
|
||||
|
||||
auto score = cv::mean(croppedImg, mask)[0];
|
||||
return score;
|
||||
}
|
||||
|
||||
std::vector<std::vector<std::vector<int>>>
|
||||
boxes_from_bitmap(const cv::Mat &pred, const cv::Mat &bitmap) {
|
||||
const int min_size = 3;
|
||||
const int max_candidates = 1000;
|
||||
const float box_thresh = 0.5;
|
||||
|
||||
int width = bitmap.cols;
|
||||
int height = bitmap.rows;
|
||||
|
||||
std::vector<std::vector<cv::Point>> contours;
|
||||
std::vector<cv::Vec4i> hierarchy;
|
||||
|
||||
cv::findContours(bitmap, contours, hierarchy, cv::RETR_LIST,
|
||||
cv::CHAIN_APPROX_SIMPLE);
|
||||
|
||||
int num_contours =
|
||||
contours.size() >= max_candidates ? max_candidates : contours.size();
|
||||
|
||||
std::vector<std::vector<std::vector<int>>> boxes;
|
||||
|
||||
for (int _i = 0; _i < num_contours; _i++) {
|
||||
float ssid;
|
||||
cv::RotatedRect box = cv::minAreaRect(contours[_i]);
|
||||
auto array = get_mini_boxes(box, ssid);
|
||||
|
||||
auto box_for_unclip = array;
|
||||
// end get_mini_box
|
||||
|
||||
if (ssid < min_size) {
|
||||
continue;
|
||||
}
|
||||
|
||||
float score;
|
||||
score = box_score_fast(array, pred);
|
||||
// end box_score_fast
|
||||
if (score < box_thresh) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// start for unclip
|
||||
cv::RotatedRect points = unclip(box_for_unclip);
|
||||
// end for unclip
|
||||
|
||||
cv::RotatedRect clipbox = points;
|
||||
auto cliparray = get_mini_boxes(clipbox, ssid);
|
||||
|
||||
if (ssid < min_size + 2)
|
||||
continue;
|
||||
|
||||
int dest_width = pred.cols;
|
||||
int dest_height = pred.rows;
|
||||
std::vector<std::vector<int>> intcliparray;
|
||||
|
||||
for (int num_pt = 0; num_pt < 4; num_pt++) {
|
||||
std::vector<int> a{int(clampf(roundf(cliparray[num_pt][0] / float(width) *
|
||||
float(dest_width)),
|
||||
0, float(dest_width))),
|
||||
int(clampf(roundf(cliparray[num_pt][1] /
|
||||
float(height) * float(dest_height)),
|
||||
0, float(dest_height)))};
|
||||
intcliparray.emplace_back(std::move(a));
|
||||
}
|
||||
boxes.emplace_back(std::move(intcliparray));
|
||||
|
||||
} // end for
|
||||
return boxes;
|
||||
}
|
||||
|
||||
int _max(int a, int b) { return a >= b ? a : b; }
|
||||
|
||||
int _min(int a, int b) { return a >= b ? b : a; }
|
||||
|
||||
std::vector<std::vector<std::vector<int>>>
|
||||
filter_tag_det_res(const std::vector<std::vector<std::vector<int>>> &o_boxes,
|
||||
float ratio_h, float ratio_w, const cv::Mat &srcimg) {
|
||||
int oriimg_h = srcimg.rows;
|
||||
int oriimg_w = srcimg.cols;
|
||||
std::vector<std::vector<std::vector<int>>> boxes{o_boxes};
|
||||
std::vector<std::vector<std::vector<int>>> root_points;
|
||||
for (int n = 0; n < boxes.size(); n++) {
|
||||
boxes[n] = order_points_clockwise(boxes[n]);
|
||||
for (int m = 0; m < boxes[0].size(); m++) {
|
||||
boxes[n][m][0] /= ratio_w;
|
||||
boxes[n][m][1] /= ratio_h;
|
||||
|
||||
boxes[n][m][0] = int(_min(_max(boxes[n][m][0], 0), oriimg_w - 1));
|
||||
boxes[n][m][1] = int(_min(_max(boxes[n][m][1], 0), oriimg_h - 1));
|
||||
}
|
||||
}
|
||||
|
||||
for (int n = 0; n < boxes.size(); n++) {
|
||||
int rect_width, rect_height;
|
||||
rect_width = int(sqrt(pow(boxes[n][0][0] - boxes[n][1][0], 2) +
|
||||
pow(boxes[n][0][1] - boxes[n][1][1], 2)));
|
||||
rect_height = int(sqrt(pow(boxes[n][0][0] - boxes[n][3][0], 2) +
|
||||
pow(boxes[n][0][1] - boxes[n][3][1], 2)));
|
||||
if (rect_width <= 10 || rect_height <= 10)
|
||||
continue;
|
||||
root_points.push_back(boxes[n]);
|
||||
}
|
||||
return root_points;
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
//
|
||||
// Created by fujiayi on 2020/7/2.
|
||||
//
|
||||
#pragma once
|
||||
#include <opencv2/opencv.hpp>
|
||||
#include <vector>
|
||||
|
||||
std::vector<std::vector<std::vector<int>>>
|
||||
boxes_from_bitmap(const cv::Mat &pred, const cv::Mat &bitmap);
|
||||
|
||||
std::vector<std::vector<std::vector<int>>>
|
||||
filter_tag_det_res(const std::vector<std::vector<std::vector<int>>> &o_boxes,
|
||||
float ratio_h, float ratio_w, const cv::Mat &srcimg);
|
|
@ -0,0 +1,261 @@
|
|||
//
|
||||
// Created by fujiayi on 2020/7/1.
|
||||
//
|
||||
|
||||
#include "ocr_ppredictor.h"
|
||||
#include "common.h"
|
||||
#include "ocr_cls_process.h"
|
||||
#include "ocr_crnn_process.h"
|
||||
#include "ocr_db_post_process.h"
|
||||
#include "preprocess.h"
|
||||
|
||||
namespace ppredictor {
|
||||
|
||||
OCR_PPredictor::OCR_PPredictor(const OCR_Config &config) : _config(config) {}
|
||||
|
||||
int OCR_PPredictor::init(const std::string &det_model_content,
|
||||
const std::string &rec_model_content,
|
||||
const std::string &cls_model_content) {
|
||||
_det_predictor = std::unique_ptr<PPredictor>(
|
||||
new PPredictor{_config.thread_num, NET_OCR, _config.mode});
|
||||
_det_predictor->init_nb(det_model_content);
|
||||
|
||||
_rec_predictor = std::unique_ptr<PPredictor>(
|
||||
new PPredictor{_config.thread_num, NET_OCR_INTERNAL, _config.mode});
|
||||
_rec_predictor->init_nb(rec_model_content);
|
||||
|
||||
_cls_predictor = std::unique_ptr<PPredictor>(
|
||||
new PPredictor{_config.thread_num, NET_OCR_INTERNAL, _config.mode});
|
||||
_cls_predictor->init_nb(cls_model_content);
|
||||
return RETURN_OK;
|
||||
}
|
||||
|
||||
int OCR_PPredictor::init_from_file(const std::string &det_model_path,
|
||||
const std::string &rec_model_path,
|
||||
const std::string &cls_model_path) {
|
||||
_det_predictor = std::unique_ptr<PPredictor>(
|
||||
new PPredictor{_config.thread_num, NET_OCR, _config.mode});
|
||||
_det_predictor->init_from_file(det_model_path);
|
||||
|
||||
_rec_predictor = std::unique_ptr<PPredictor>(
|
||||
new PPredictor{_config.thread_num, NET_OCR_INTERNAL, _config.mode});
|
||||
_rec_predictor->init_from_file(rec_model_path);
|
||||
|
||||
_cls_predictor = std::unique_ptr<PPredictor>(
|
||||
new PPredictor{_config.thread_num, NET_OCR_INTERNAL, _config.mode});
|
||||
_cls_predictor->init_from_file(cls_model_path);
|
||||
return RETURN_OK;
|
||||
}
|
||||
/**
|
||||
* for debug use, show result of First Step
|
||||
* @param filter_boxes
|
||||
* @param boxes
|
||||
* @param srcimg
|
||||
*/
|
||||
static void
|
||||
visual_img(const std::vector<std::vector<std::vector<int>>> &filter_boxes,
|
||||
const std::vector<std::vector<std::vector<int>>> &boxes,
|
||||
const cv::Mat &srcimg) {
|
||||
// visualization
|
||||
cv::Point rook_points[filter_boxes.size()][4];
|
||||
for (int n = 0; n < filter_boxes.size(); n++) {
|
||||
for (int m = 0; m < filter_boxes[0].size(); m++) {
|
||||
rook_points[n][m] =
|
||||
cv::Point(int(filter_boxes[n][m][0]), int(filter_boxes[n][m][1]));
|
||||
}
|
||||
}
|
||||
|
||||
cv::Mat img_vis;
|
||||
srcimg.copyTo(img_vis);
|
||||
for (int n = 0; n < boxes.size(); n++) {
|
||||
const cv::Point *ppt[1] = {rook_points[n]};
|
||||
int npt[] = {4};
|
||||
cv::polylines(img_vis, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0);
|
||||
}
|
||||
// 调试用,自行替换需要修改的路径
|
||||
cv::imwrite("/sdcard/1/vis.png", img_vis);
|
||||
}
|
||||
|
||||
std::vector<OCRPredictResult>
|
||||
OCR_PPredictor::infer_ocr(const std::vector<int64_t> &dims,
|
||||
const float *input_data, int input_len, int net_flag,
|
||||
cv::Mat &origin) {
|
||||
PredictorInput input = _det_predictor->get_first_input();
|
||||
input.set_dims(dims);
|
||||
input.set_data(input_data, input_len);
|
||||
std::vector<PredictorOutput> results = _det_predictor->infer();
|
||||
PredictorOutput &res = results.at(0);
|
||||
std::vector<std::vector<std::vector<int>>> filtered_box = calc_filtered_boxes(
|
||||
res.get_float_data(), res.get_size(), (int)dims[2], (int)dims[3], origin);
|
||||
LOGI("Filter_box size %ld", filtered_box.size());
|
||||
return infer_rec(filtered_box, origin);
|
||||
}
|
||||
|
||||
std::vector<OCRPredictResult> OCR_PPredictor::infer_rec(
|
||||
const std::vector<std::vector<std::vector<int>>> &boxes,
|
||||
const cv::Mat &origin_img) {
|
||||
std::vector<float> mean = {0.5f, 0.5f, 0.5f};
|
||||
std::vector<float> scale = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f};
|
||||
std::vector<int64_t> dims = {1, 3, 0, 0};
|
||||
std::vector<OCRPredictResult> ocr_results;
|
||||
|
||||
PredictorInput input = _rec_predictor->get_first_input();
|
||||
for (auto bp = boxes.crbegin(); bp != boxes.crend(); ++bp) {
|
||||
const std::vector<std::vector<int>> &box = *bp;
|
||||
cv::Mat crop_img = get_rotate_crop_image(origin_img, box);
|
||||
crop_img = infer_cls(crop_img);
|
||||
|
||||
float wh_ratio = float(crop_img.cols) / float(crop_img.rows);
|
||||
cv::Mat input_image = crnn_resize_img(crop_img, wh_ratio);
|
||||
input_image.convertTo(input_image, CV_32FC3, 1 / 255.0f);
|
||||
const float *dimg = reinterpret_cast<const float *>(input_image.data);
|
||||
int input_size = input_image.rows * input_image.cols;
|
||||
|
||||
dims[2] = input_image.rows;
|
||||
dims[3] = input_image.cols;
|
||||
input.set_dims(dims);
|
||||
|
||||
neon_mean_scale(dimg, input.get_mutable_float_data(), input_size, mean,
|
||||
scale);
|
||||
|
||||
std::vector<PredictorOutput> results = _rec_predictor->infer();
|
||||
const float *predict_batch = results.at(0).get_float_data();
|
||||
const std::vector<int64_t> predict_shape = results.at(0).get_shape();
|
||||
|
||||
OCRPredictResult res;
|
||||
|
||||
// ctc decode
|
||||
int argmax_idx;
|
||||
int last_index = 0;
|
||||
float score = 0.f;
|
||||
int count = 0;
|
||||
float max_value = 0.0f;
|
||||
|
||||
for (int n = 0; n < predict_shape[1]; n++) {
|
||||
argmax_idx = int(argmax(&predict_batch[n * predict_shape[2]],
|
||||
&predict_batch[(n + 1) * predict_shape[2]]));
|
||||
max_value =
|
||||
float(*std::max_element(&predict_batch[n * predict_shape[2]],
|
||||
&predict_batch[(n + 1) * predict_shape[2]]));
|
||||
if (argmax_idx > 0 && (!(n > 0 && argmax_idx == last_index))) {
|
||||
score += max_value;
|
||||
count += 1;
|
||||
res.word_index.push_back(argmax_idx);
|
||||
}
|
||||
last_index = argmax_idx;
|
||||
}
|
||||
score /= count;
|
||||
if (res.word_index.empty()) {
|
||||
continue;
|
||||
}
|
||||
res.score = score;
|
||||
res.points = box;
|
||||
ocr_results.emplace_back(std::move(res));
|
||||
}
|
||||
LOGI("ocr_results finished %lu", ocr_results.size());
|
||||
return ocr_results;
|
||||
}
|
||||
|
||||
cv::Mat OCR_PPredictor::infer_cls(const cv::Mat &img, float thresh) {
|
||||
std::vector<float> mean = {0.5f, 0.5f, 0.5f};
|
||||
std::vector<float> scale = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f};
|
||||
std::vector<int64_t> dims = {1, 3, 0, 0};
|
||||
std::vector<OCRPredictResult> ocr_results;
|
||||
|
||||
PredictorInput input = _cls_predictor->get_first_input();
|
||||
|
||||
cv::Mat input_image = cls_resize_img(img);
|
||||
input_image.convertTo(input_image, CV_32FC3, 1 / 255.0f);
|
||||
const float *dimg = reinterpret_cast<const float *>(input_image.data);
|
||||
int input_size = input_image.rows * input_image.cols;
|
||||
|
||||
dims[2] = input_image.rows;
|
||||
dims[3] = input_image.cols;
|
||||
input.set_dims(dims);
|
||||
|
||||
neon_mean_scale(dimg, input.get_mutable_float_data(), input_size, mean,
|
||||
scale);
|
||||
|
||||
std::vector<PredictorOutput> results = _cls_predictor->infer();
|
||||
|
||||
const float *scores = results.at(0).get_float_data();
|
||||
float score = 0;
|
||||
int label = 0;
|
||||
for (int64_t i = 0; i < results.at(0).get_size(); i++) {
|
||||
LOGI("output scores [%f]", scores[i]);
|
||||
if (scores[i] > score) {
|
||||
score = scores[i];
|
||||
label = i;
|
||||
}
|
||||
}
|
||||
cv::Mat srcimg;
|
||||
img.copyTo(srcimg);
|
||||
if (label % 2 == 1 && score > thresh) {
|
||||
cv::rotate(srcimg, srcimg, 1);
|
||||
}
|
||||
return srcimg;
|
||||
}
|
||||
|
||||
std::vector<std::vector<std::vector<int>>>
|
||||
OCR_PPredictor::calc_filtered_boxes(const float *pred, int pred_size,
|
||||
int output_height, int output_width,
|
||||
const cv::Mat &origin) {
|
||||
const double threshold = 0.3;
|
||||
const double maxvalue = 1;
|
||||
|
||||
cv::Mat pred_map = cv::Mat::zeros(output_height, output_width, CV_32F);
|
||||
memcpy(pred_map.data, pred, pred_size * sizeof(float));
|
||||
cv::Mat cbuf_map;
|
||||
pred_map.convertTo(cbuf_map, CV_8UC1);
|
||||
|
||||
cv::Mat bit_map;
|
||||
cv::threshold(cbuf_map, bit_map, threshold, maxvalue, cv::THRESH_BINARY);
|
||||
|
||||
std::vector<std::vector<std::vector<int>>> boxes =
|
||||
boxes_from_bitmap(pred_map, bit_map);
|
||||
float ratio_h = output_height * 1.0f / origin.rows;
|
||||
float ratio_w = output_width * 1.0f / origin.cols;
|
||||
std::vector<std::vector<std::vector<int>>> filter_boxes =
|
||||
filter_tag_det_res(boxes, ratio_h, ratio_w, origin);
|
||||
return filter_boxes;
|
||||
}
|
||||
|
||||
std::vector<int>
|
||||
OCR_PPredictor::postprocess_rec_word_index(const PredictorOutput &res) {
|
||||
const int *rec_idx = res.get_int_data();
|
||||
const std::vector<std::vector<uint64_t>> rec_idx_lod = res.get_lod();
|
||||
|
||||
std::vector<int> pred_idx;
|
||||
for (int n = int(rec_idx_lod[0][0]); n < int(rec_idx_lod[0][1] * 2); n += 2) {
|
||||
pred_idx.emplace_back(rec_idx[n]);
|
||||
}
|
||||
return pred_idx;
|
||||
}
|
||||
|
||||
float OCR_PPredictor::postprocess_rec_score(const PredictorOutput &res) {
|
||||
const float *predict_batch = res.get_float_data();
|
||||
const std::vector<int64_t> predict_shape = res.get_shape();
|
||||
const std::vector<std::vector<uint64_t>> predict_lod = res.get_lod();
|
||||
int blank = predict_shape[1];
|
||||
float score = 0.f;
|
||||
int count = 0;
|
||||
for (int n = predict_lod[0][0]; n < predict_lod[0][1] - 1; n++) {
|
||||
int argmax_idx = argmax(predict_batch + n * predict_shape[1],
|
||||
predict_batch + (n + 1) * predict_shape[1]);
|
||||
float max_value = predict_batch[n * predict_shape[1] + argmax_idx];
|
||||
if (blank - 1 - argmax_idx > 1e-5) {
|
||||
score += max_value;
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
if (count == 0) {
|
||||
LOGE("calc score count 0");
|
||||
} else {
|
||||
score /= count;
|
||||
}
|
||||
LOGI("calc score: %f", score);
|
||||
return score;
|
||||
}
|
||||
|
||||
NET_TYPE OCR_PPredictor::get_net_flag() const { return NET_OCR; }
|
||||
}
|
|
@ -0,0 +1,122 @@
|
|||
//
|
||||
// Created by fujiayi on 2020/7/1.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ppredictor.h"
|
||||
#include <opencv2/opencv.hpp>
|
||||
#include <paddle_api.h>
|
||||
#include <string>
|
||||
|
||||
namespace ppredictor {
|
||||
|
||||
/**
|
||||
* Config
|
||||
*/
|
||||
struct OCR_Config {
|
||||
int thread_num = 4; // Thread num
|
||||
paddle::lite_api::PowerMode mode =
|
||||
paddle::lite_api::LITE_POWER_HIGH; // PaddleLite Mode
|
||||
};
|
||||
|
||||
/**
|
||||
* PolyGone Result
|
||||
*/
|
||||
struct OCRPredictResult {
|
||||
std::vector<int> word_index;
|
||||
std::vector<std::vector<int>> points;
|
||||
float score;
|
||||
};
|
||||
|
||||
/**
|
||||
* OCR there are 2 models
|
||||
* 1. First model(det),select polygones to show where are the texts
|
||||
* 2. crop from the origin images, use these polygones to infer
|
||||
*/
|
||||
class OCR_PPredictor : public PPredictor_Interface {
|
||||
public:
|
||||
OCR_PPredictor(const OCR_Config &config);
|
||||
|
||||
virtual ~OCR_PPredictor() {}
|
||||
|
||||
/**
|
||||
* 初始化二个模型的Predictor
|
||||
* @param det_model_content
|
||||
* @param rec_model_content
|
||||
* @return
|
||||
*/
|
||||
int init(const std::string &det_model_content,
|
||||
const std::string &rec_model_content,
|
||||
const std::string &cls_model_content);
|
||||
int init_from_file(const std::string &det_model_path,
|
||||
const std::string &rec_model_path,
|
||||
const std::string &cls_model_path);
|
||||
/**
|
||||
* Return OCR result
|
||||
* @param dims
|
||||
* @param input_data
|
||||
* @param input_len
|
||||
* @param net_flag
|
||||
* @param origin
|
||||
* @return
|
||||
*/
|
||||
virtual std::vector<OCRPredictResult>
|
||||
infer_ocr(const std::vector<int64_t> &dims, const float *input_data,
|
||||
int input_len, int net_flag, cv::Mat &origin);
|
||||
|
||||
virtual NET_TYPE get_net_flag() const;
|
||||
|
||||
private:
|
||||
/**
|
||||
* calcul Polygone from the result image of first model
|
||||
* @param pred
|
||||
* @param output_height
|
||||
* @param output_width
|
||||
* @param origin
|
||||
* @return
|
||||
*/
|
||||
std::vector<std::vector<std::vector<int>>>
|
||||
calc_filtered_boxes(const float *pred, int pred_size, int output_height,
|
||||
int output_width, const cv::Mat &origin);
|
||||
|
||||
/**
|
||||
* infer for second model
|
||||
*
|
||||
* @param boxes
|
||||
* @param origin
|
||||
* @return
|
||||
*/
|
||||
std::vector<OCRPredictResult>
|
||||
infer_rec(const std::vector<std::vector<std::vector<int>>> &boxes,
|
||||
const cv::Mat &origin);
|
||||
|
||||
/**
|
||||
* infer for cls model
|
||||
*
|
||||
* @param boxes
|
||||
* @param origin
|
||||
* @return
|
||||
*/
|
||||
cv::Mat infer_cls(const cv::Mat &origin, float thresh = 0.9);
|
||||
|
||||
/**
|
||||
* Postprocess or sencod model to extract text
|
||||
* @param res
|
||||
* @return
|
||||
*/
|
||||
std::vector<int> postprocess_rec_word_index(const PredictorOutput &res);
|
||||
|
||||
/**
|
||||
* calculate confidence of second model text result
|
||||
* @param res
|
||||
* @return
|
||||
*/
|
||||
float postprocess_rec_score(const PredictorOutput &res);
|
||||
|
||||
std::unique_ptr<PPredictor> _det_predictor;
|
||||
std::unique_ptr<PPredictor> _rec_predictor;
|
||||
std::unique_ptr<PPredictor> _cls_predictor;
|
||||
OCR_Config _config;
|
||||
};
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
#include "ppredictor.h"
|
||||
#include "common.h"
|
||||
|
||||
namespace ppredictor {
|
||||
PPredictor::PPredictor(int thread_num, int net_flag,
|
||||
paddle::lite_api::PowerMode mode)
|
||||
: _thread_num(thread_num), _net_flag(net_flag), _mode(mode) {}
|
||||
|
||||
int PPredictor::init_nb(const std::string &model_content) {
|
||||
paddle::lite_api::MobileConfig config;
|
||||
config.set_model_from_buffer(model_content);
|
||||
return _init(config);
|
||||
}
|
||||
|
||||
int PPredictor::init_from_file(const std::string &model_content) {
|
||||
paddle::lite_api::MobileConfig config;
|
||||
config.set_model_from_file(model_content);
|
||||
return _init(config);
|
||||
}
|
||||
|
||||
template <typename ConfigT> int PPredictor::_init(ConfigT &config) {
|
||||
config.set_threads(_thread_num);
|
||||
config.set_power_mode(_mode);
|
||||
_predictor = paddle::lite_api::CreatePaddlePredictor(config);
|
||||
LOGI("paddle instance created");
|
||||
return RETURN_OK;
|
||||
}
|
||||
|
||||
PredictorInput PPredictor::get_input(int index) {
|
||||
PredictorInput input{_predictor->GetInput(index), index, _net_flag};
|
||||
_is_input_get = true;
|
||||
return input;
|
||||
}
|
||||
|
||||
std::vector<PredictorInput> PPredictor::get_inputs(int num) {
|
||||
std::vector<PredictorInput> results;
|
||||
for (int i = 0; i < num; i++) {
|
||||
results.emplace_back(get_input(i));
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
PredictorInput PPredictor::get_first_input() { return get_input(0); }
|
||||
|
||||
std::vector<PredictorOutput> PPredictor::infer() {
|
||||
LOGI("infer Run start %d", _net_flag);
|
||||
std::vector<PredictorOutput> results;
|
||||
if (!_is_input_get) {
|
||||
return results;
|
||||
}
|
||||
_predictor->Run();
|
||||
LOGI("infer Run end");
|
||||
|
||||
for (int i = 0; i < _predictor->GetOutputNames().size(); i++) {
|
||||
std::unique_ptr<const paddle::lite_api::Tensor> output_tensor =
|
||||
_predictor->GetOutput(i);
|
||||
LOGI("output tensor[%d] size %ld", i, product(output_tensor->shape()));
|
||||
PredictorOutput result{std::move(output_tensor), i, _net_flag};
|
||||
results.emplace_back(std::move(result));
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
NET_TYPE PPredictor::get_net_flag() const { return (NET_TYPE)_net_flag; }
|
||||
}
|
|
@ -0,0 +1,63 @@
|
|||
#pragma once
|
||||
|
||||
#include "paddle_api.h"
|
||||
#include "predictor_input.h"
|
||||
#include "predictor_output.h"
|
||||
|
||||
namespace ppredictor {
|
||||
|
||||
/**
|
||||
* PaddleLite Preditor Common Interface
|
||||
*/
|
||||
class PPredictor_Interface {
|
||||
public:
|
||||
virtual ~PPredictor_Interface() {}
|
||||
|
||||
virtual NET_TYPE get_net_flag() const = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
* Common Predictor
|
||||
*/
|
||||
class PPredictor : public PPredictor_Interface {
|
||||
public:
|
||||
PPredictor(
|
||||
int thread_num, int net_flag = 0,
|
||||
paddle::lite_api::PowerMode mode = paddle::lite_api::LITE_POWER_HIGH);
|
||||
|
||||
virtual ~PPredictor() {}
|
||||
|
||||
/**
|
||||
* init paddlitelite opt model,nb format ,or use ini_paddle
|
||||
* @param model_content
|
||||
* @return 0
|
||||
*/
|
||||
virtual int init_nb(const std::string &model_content);
|
||||
|
||||
virtual int init_from_file(const std::string &model_content);
|
||||
|
||||
std::vector<PredictorOutput> infer();
|
||||
|
||||
std::shared_ptr<paddle::lite_api::PaddlePredictor> get_predictor() {
|
||||
return _predictor;
|
||||
}
|
||||
|
||||
virtual std::vector<PredictorInput> get_inputs(int num);
|
||||
|
||||
virtual PredictorInput get_input(int index);
|
||||
|
||||
virtual PredictorInput get_first_input();
|
||||
|
||||
virtual NET_TYPE get_net_flag() const;
|
||||
|
||||
protected:
|
||||
template <typename ConfigT> int _init(ConfigT &config);
|
||||
|
||||
private:
|
||||
int _thread_num;
|
||||
paddle::lite_api::PowerMode _mode;
|
||||
std::shared_ptr<paddle::lite_api::PaddlePredictor> _predictor;
|
||||
bool _is_input_get = false;
|
||||
int _net_flag;
|
||||
};
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
#include "predictor_input.h"
|
||||
|
||||
namespace ppredictor {
|
||||
|
||||
void PredictorInput::set_dims(std::vector<int64_t> dims) {
|
||||
// yolov3
|
||||
if (_net_flag == 101 && _index == 1) {
|
||||
_tensor->Resize({1, 2});
|
||||
_tensor->mutable_data<int>()[0] = (int)dims.at(2);
|
||||
_tensor->mutable_data<int>()[1] = (int)dims.at(3);
|
||||
} else {
|
||||
_tensor->Resize(dims);
|
||||
}
|
||||
_is_dims_set = true;
|
||||
}
|
||||
|
||||
float *PredictorInput::get_mutable_float_data() {
|
||||
if (!_is_dims_set) {
|
||||
LOGE("PredictorInput::set_dims is not called");
|
||||
}
|
||||
return _tensor->mutable_data<float>();
|
||||
}
|
||||
|
||||
void PredictorInput::set_data(const float *input_data, int input_float_len) {
|
||||
float *input_raw_data = get_mutable_float_data();
|
||||
memcpy(input_raw_data, input_data, input_float_len * sizeof(float));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,26 @@
|
|||
#pragma once
|
||||
|
||||
#include "common.h"
|
||||
#include <paddle_api.h>
|
||||
#include <vector>
|
||||
|
||||
namespace ppredictor {
|
||||
class PredictorInput {
|
||||
public:
|
||||
PredictorInput(std::unique_ptr<paddle::lite_api::Tensor> &&tensor, int index,
|
||||
int net_flag)
|
||||
: _tensor(std::move(tensor)), _index(index), _net_flag(net_flag) {}
|
||||
|
||||
void set_dims(std::vector<int64_t> dims);
|
||||
|
||||
float *get_mutable_float_data();
|
||||
|
||||
void set_data(const float *input_data, int input_float_len);
|
||||
|
||||
private:
|
||||
std::unique_ptr<paddle::lite_api::Tensor> _tensor;
|
||||
bool _is_dims_set = false;
|
||||
int _index;
|
||||
int _net_flag;
|
||||
};
|
||||
}
|
|
@ -0,0 +1,26 @@
|
|||
#include "predictor_output.h"
|
||||
namespace ppredictor {
|
||||
const float *PredictorOutput::get_float_data() const {
|
||||
return _tensor->data<float>();
|
||||
}
|
||||
|
||||
const int *PredictorOutput::get_int_data() const {
|
||||
return _tensor->data<int>();
|
||||
}
|
||||
|
||||
const std::vector<std::vector<uint64_t>> PredictorOutput::get_lod() const {
|
||||
return _tensor->lod();
|
||||
}
|
||||
|
||||
int64_t PredictorOutput::get_size() const {
|
||||
if (_net_flag == NET_OCR) {
|
||||
return _tensor->shape().at(2) * _tensor->shape().at(3);
|
||||
} else {
|
||||
return product(_tensor->shape());
|
||||
}
|
||||
}
|
||||
|
||||
const std::vector<int64_t> PredictorOutput::get_shape() const {
|
||||
return _tensor->shape();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
#pragma once
|
||||
|
||||
#include "common.h"
|
||||
#include <paddle_api.h>
|
||||
#include <vector>
|
||||
|
||||
namespace ppredictor {
|
||||
class PredictorOutput {
|
||||
public:
|
||||
PredictorOutput() {}
|
||||
PredictorOutput(std::unique_ptr<const paddle::lite_api::Tensor> &&tensor,
|
||||
int index, int net_flag)
|
||||
: _tensor(std::move(tensor)), _index(index), _net_flag(net_flag) {}
|
||||
|
||||
const float *get_float_data() const;
|
||||
const int *get_int_data() const;
|
||||
int64_t get_size() const;
|
||||
const std::vector<std::vector<uint64_t>> get_lod() const;
|
||||
const std::vector<int64_t> get_shape() const;
|
||||
|
||||
std::vector<float> data; // return float, or use data_int
|
||||
std::vector<int> data_int; // several layers return int ,or use data
|
||||
std::vector<int64_t> shape; // PaddleLite output shape
|
||||
std::vector<std::vector<uint64_t>> lod; // PaddleLite output lod
|
||||
|
||||
private:
|
||||
std::unique_ptr<const paddle::lite_api::Tensor> _tensor;
|
||||
int _index;
|
||||
int _net_flag;
|
||||
};
|
||||
}
|
|
@ -0,0 +1,82 @@
|
|||
#include "preprocess.h"
|
||||
#include <android/bitmap.h>
|
||||
|
||||
cv::Mat bitmap_to_cv_mat(JNIEnv *env, jobject bitmap) {
|
||||
AndroidBitmapInfo info;
|
||||
int result = AndroidBitmap_getInfo(env, bitmap, &info);
|
||||
if (result != ANDROID_BITMAP_RESULT_SUCCESS) {
|
||||
LOGE("AndroidBitmap_getInfo failed, result: %d", result);
|
||||
return cv::Mat{};
|
||||
}
|
||||
if (info.format != ANDROID_BITMAP_FORMAT_RGBA_8888) {
|
||||
LOGE("Bitmap format is not RGBA_8888 !");
|
||||
return cv::Mat{};
|
||||
}
|
||||
unsigned char *srcData = NULL;
|
||||
AndroidBitmap_lockPixels(env, bitmap, (void **)&srcData);
|
||||
cv::Mat mat = cv::Mat::zeros(info.height, info.width, CV_8UC4);
|
||||
memcpy(mat.data, srcData, info.height * info.width * 4);
|
||||
AndroidBitmap_unlockPixels(env, bitmap);
|
||||
cv::cvtColor(mat, mat, cv::COLOR_RGBA2BGR);
|
||||
/**
|
||||
if (!cv::imwrite("/sdcard/1/copy.jpg", mat)){
|
||||
LOGE("Write image failed " );
|
||||
}
|
||||
*/
|
||||
|
||||
return mat;
|
||||
}
|
||||
|
||||
cv::Mat resize_img(const cv::Mat &img, int height, int width) {
|
||||
if (img.rows == height && img.cols == width) {
|
||||
return img;
|
||||
}
|
||||
cv::Mat new_img;
|
||||
cv::resize(img, new_img, cv::Size(height, width));
|
||||
return new_img;
|
||||
}
|
||||
|
||||
// fill tensor with mean and scale and trans layout: nhwc -> nchw, neon speed up
|
||||
void neon_mean_scale(const float *din, float *dout, int size,
|
||||
const std::vector<float> &mean,
|
||||
const std::vector<float> &scale) {
|
||||
if (mean.size() != 3 || scale.size() != 3) {
|
||||
LOGE("[ERROR] mean or scale size must equal to 3");
|
||||
return;
|
||||
}
|
||||
|
||||
float32x4_t vmean0 = vdupq_n_f32(mean[0]);
|
||||
float32x4_t vmean1 = vdupq_n_f32(mean[1]);
|
||||
float32x4_t vmean2 = vdupq_n_f32(mean[2]);
|
||||
float32x4_t vscale0 = vdupq_n_f32(scale[0]);
|
||||
float32x4_t vscale1 = vdupq_n_f32(scale[1]);
|
||||
float32x4_t vscale2 = vdupq_n_f32(scale[2]);
|
||||
|
||||
float *dout_c0 = dout;
|
||||
float *dout_c1 = dout + size;
|
||||
float *dout_c2 = dout + size * 2;
|
||||
|
||||
int i = 0;
|
||||
for (; i < size - 3; i += 4) {
|
||||
float32x4x3_t vin3 = vld3q_f32(din);
|
||||
float32x4_t vsub0 = vsubq_f32(vin3.val[0], vmean0);
|
||||
float32x4_t vsub1 = vsubq_f32(vin3.val[1], vmean1);
|
||||
float32x4_t vsub2 = vsubq_f32(vin3.val[2], vmean2);
|
||||
float32x4_t vs0 = vmulq_f32(vsub0, vscale0);
|
||||
float32x4_t vs1 = vmulq_f32(vsub1, vscale1);
|
||||
float32x4_t vs2 = vmulq_f32(vsub2, vscale2);
|
||||
vst1q_f32(dout_c0, vs0);
|
||||
vst1q_f32(dout_c1, vs1);
|
||||
vst1q_f32(dout_c2, vs2);
|
||||
|
||||
din += 12;
|
||||
dout_c0 += 4;
|
||||
dout_c1 += 4;
|
||||
dout_c2 += 4;
|
||||
}
|
||||
for (; i < size; i++) {
|
||||
*(dout_c0++) = (*(din++) - mean[0]) * scale[0];
|
||||
*(dout_c1++) = (*(din++) - mean[1]) * scale[1];
|
||||
*(dout_c2++) = (*(din++) - mean[2]) * scale[2];
|
||||
}
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
#pragma once
|
||||
|
||||
#include "common.h"
|
||||
#include <jni.h>
|
||||
#include <opencv2/opencv.hpp>
|
||||
cv::Mat bitmap_to_cv_mat(JNIEnv *env, jobject bitmap);
|
||||
|
||||
cv::Mat resize_img(const cv::Mat &img, int height, int width);
|
||||
|
||||
void neon_mean_scale(const float *din, float *dout, int size,
|
||||
const std::vector<float> &mean,
|
||||
const std::vector<float> &scale);
|
|
@ -0,0 +1,128 @@
|
|||
/*
|
||||
* Copyright (C) 2014 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.baidu.paddle.lite.demo.ocr;
|
||||
|
||||
import android.content.res.Configuration;
|
||||
import android.os.Bundle;
|
||||
import android.preference.PreferenceActivity;
|
||||
import android.view.MenuInflater;
|
||||
import android.view.View;
|
||||
import android.view.ViewGroup;
|
||||
|
||||
import androidx.annotation.LayoutRes;
|
||||
import androidx.annotation.Nullable;
|
||||
import androidx.appcompat.app.ActionBar;
|
||||
import androidx.appcompat.app.AppCompatDelegate;
|
||||
import androidx.appcompat.widget.Toolbar;
|
||||
|
||||
/**
|
||||
* A {@link PreferenceActivity} which implements and proxies the necessary calls
|
||||
* to be used with AppCompat.
|
||||
* <p>
|
||||
* This technique can be used with an {@link android.app.Activity} class, not just
|
||||
* {@link PreferenceActivity}.
|
||||
*/
|
||||
public abstract class AppCompatPreferenceActivity extends PreferenceActivity {
|
||||
private AppCompatDelegate mDelegate;
|
||||
|
||||
@Override
|
||||
protected void onCreate(Bundle savedInstanceState) {
|
||||
getDelegate().installViewFactory();
|
||||
getDelegate().onCreate(savedInstanceState);
|
||||
super.onCreate(savedInstanceState);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onPostCreate(Bundle savedInstanceState) {
|
||||
super.onPostCreate(savedInstanceState);
|
||||
getDelegate().onPostCreate(savedInstanceState);
|
||||
}
|
||||
|
||||
public ActionBar getSupportActionBar() {
|
||||
return getDelegate().getSupportActionBar();
|
||||
}
|
||||
|
||||
public void setSupportActionBar(@Nullable Toolbar toolbar) {
|
||||
getDelegate().setSupportActionBar(toolbar);
|
||||
}
|
||||
|
||||
@Override
|
||||
public MenuInflater getMenuInflater() {
|
||||
return getDelegate().getMenuInflater();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setContentView(@LayoutRes int layoutResID) {
|
||||
getDelegate().setContentView(layoutResID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setContentView(View view) {
|
||||
getDelegate().setContentView(view);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setContentView(View view, ViewGroup.LayoutParams params) {
|
||||
getDelegate().setContentView(view, params);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addContentView(View view, ViewGroup.LayoutParams params) {
|
||||
getDelegate().addContentView(view, params);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onPostResume() {
|
||||
super.onPostResume();
|
||||
getDelegate().onPostResume();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onTitleChanged(CharSequence title, int color) {
|
||||
super.onTitleChanged(title, color);
|
||||
getDelegate().setTitle(title);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onConfigurationChanged(Configuration newConfig) {
|
||||
super.onConfigurationChanged(newConfig);
|
||||
getDelegate().onConfigurationChanged(newConfig);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onStop() {
|
||||
super.onStop();
|
||||
getDelegate().onStop();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onDestroy() {
|
||||
super.onDestroy();
|
||||
getDelegate().onDestroy();
|
||||
}
|
||||
|
||||
public void invalidateOptionsMenu() {
|
||||
getDelegate().invalidateOptionsMenu();
|
||||
}
|
||||
|
||||
private AppCompatDelegate getDelegate() {
|
||||
if (mDelegate == null) {
|
||||
mDelegate = AppCompatDelegate.create(this, null);
|
||||
}
|
||||
return mDelegate;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,509 @@
|
|||
package com.baidu.paddle.lite.demo.ocr;
|
||||
|
||||
import android.Manifest;
|
||||
import android.app.ProgressDialog;
|
||||
import android.content.ContentResolver;
|
||||
import android.content.Context;
|
||||
import android.content.Intent;
|
||||
import android.content.SharedPreferences;
|
||||
import android.content.pm.PackageManager;
|
||||
import android.database.Cursor;
|
||||
import android.graphics.Bitmap;
|
||||
import android.graphics.BitmapFactory;
|
||||
import android.graphics.drawable.BitmapDrawable;
|
||||
import android.media.ExifInterface;
|
||||
import android.content.res.AssetManager;
|
||||
import android.net.Uri;
|
||||
import android.os.Bundle;
|
||||
import android.os.Environment;
|
||||
import android.os.Handler;
|
||||
import android.os.HandlerThread;
|
||||
import android.os.Message;
|
||||
import android.preference.PreferenceManager;
|
||||
import android.provider.MediaStore;
|
||||
import android.text.method.ScrollingMovementMethod;
|
||||
import android.util.Log;
|
||||
import android.view.Menu;
|
||||
import android.view.MenuInflater;
|
||||
import android.view.MenuItem;
|
||||
import android.view.View;
|
||||
import android.widget.ImageView;
|
||||
import android.widget.TextView;
|
||||
import android.widget.Toast;
|
||||
|
||||
import androidx.annotation.NonNull;
|
||||
import androidx.appcompat.app.AppCompatActivity;
|
||||
import androidx.core.app.ActivityCompat;
|
||||
import androidx.core.content.ContextCompat;
|
||||
import androidx.core.content.FileProvider;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Date;
|
||||
|
||||
public class MainActivity extends AppCompatActivity {
|
||||
private static final String TAG = MainActivity.class.getSimpleName();
|
||||
public static final int OPEN_GALLERY_REQUEST_CODE = 0;
|
||||
public static final int TAKE_PHOTO_REQUEST_CODE = 1;
|
||||
|
||||
public static final int REQUEST_LOAD_MODEL = 0;
|
||||
public static final int REQUEST_RUN_MODEL = 1;
|
||||
public static final int RESPONSE_LOAD_MODEL_SUCCESSED = 0;
|
||||
public static final int RESPONSE_LOAD_MODEL_FAILED = 1;
|
||||
public static final int RESPONSE_RUN_MODEL_SUCCESSED = 2;
|
||||
public static final int RESPONSE_RUN_MODEL_FAILED = 3;
|
||||
|
||||
protected ProgressDialog pbLoadModel = null;
|
||||
protected ProgressDialog pbRunModel = null;
|
||||
|
||||
protected Handler receiver = null; // Receive messages from worker thread
|
||||
protected Handler sender = null; // Send command to worker thread
|
||||
protected HandlerThread worker = null; // Worker thread to load&run model
|
||||
|
||||
// UI components of object detection
|
||||
protected TextView tvInputSetting;
|
||||
protected TextView tvStatus;
|
||||
protected ImageView ivInputImage;
|
||||
protected TextView tvOutputResult;
|
||||
protected TextView tvInferenceTime;
|
||||
|
||||
// Model settings of object detection
|
||||
protected String modelPath = "";
|
||||
protected String labelPath = "";
|
||||
protected String imagePath = "";
|
||||
protected int cpuThreadNum = 1;
|
||||
protected String cpuPowerMode = "";
|
||||
protected String inputColorFormat = "";
|
||||
protected long[] inputShape = new long[]{};
|
||||
protected float[] inputMean = new float[]{};
|
||||
protected float[] inputStd = new float[]{};
|
||||
protected float scoreThreshold = 0.1f;
|
||||
private String currentPhotoPath;
|
||||
private AssetManager assetManager =null;
|
||||
|
||||
protected Predictor predictor = new Predictor();
|
||||
|
||||
@Override
|
||||
protected void onCreate(Bundle savedInstanceState) {
|
||||
super.onCreate(savedInstanceState);
|
||||
setContentView(R.layout.activity_main);
|
||||
|
||||
// Clear all setting items to avoid app crashing due to the incorrect settings
|
||||
SharedPreferences sharedPreferences = PreferenceManager.getDefaultSharedPreferences(this);
|
||||
SharedPreferences.Editor editor = sharedPreferences.edit();
|
||||
editor.clear();
|
||||
editor.apply();
|
||||
|
||||
// Setup the UI components
|
||||
tvInputSetting = findViewById(R.id.tv_input_setting);
|
||||
tvStatus = findViewById(R.id.tv_model_img_status);
|
||||
ivInputImage = findViewById(R.id.iv_input_image);
|
||||
tvInferenceTime = findViewById(R.id.tv_inference_time);
|
||||
tvOutputResult = findViewById(R.id.tv_output_result);
|
||||
tvInputSetting.setMovementMethod(ScrollingMovementMethod.getInstance());
|
||||
tvOutputResult.setMovementMethod(ScrollingMovementMethod.getInstance());
|
||||
|
||||
// Prepare the worker thread for mode loading and inference
|
||||
receiver = new Handler() {
|
||||
@Override
|
||||
public void handleMessage(Message msg) {
|
||||
switch (msg.what) {
|
||||
case RESPONSE_LOAD_MODEL_SUCCESSED:
|
||||
if(pbLoadModel!=null && pbLoadModel.isShowing()){
|
||||
pbLoadModel.dismiss();
|
||||
}
|
||||
onLoadModelSuccessed();
|
||||
break;
|
||||
case RESPONSE_LOAD_MODEL_FAILED:
|
||||
if(pbLoadModel!=null && pbLoadModel.isShowing()){
|
||||
pbLoadModel.dismiss();
|
||||
}
|
||||
Toast.makeText(MainActivity.this, "Load model failed!", Toast.LENGTH_SHORT).show();
|
||||
onLoadModelFailed();
|
||||
break;
|
||||
case RESPONSE_RUN_MODEL_SUCCESSED:
|
||||
if(pbRunModel!=null && pbRunModel.isShowing()){
|
||||
pbRunModel.dismiss();
|
||||
}
|
||||
onRunModelSuccessed();
|
||||
break;
|
||||
case RESPONSE_RUN_MODEL_FAILED:
|
||||
if(pbRunModel!=null && pbRunModel.isShowing()){
|
||||
pbRunModel.dismiss();
|
||||
}
|
||||
Toast.makeText(MainActivity.this, "Run model failed!", Toast.LENGTH_SHORT).show();
|
||||
onRunModelFailed();
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
worker = new HandlerThread("Predictor Worker");
|
||||
worker.start();
|
||||
sender = new Handler(worker.getLooper()) {
|
||||
public void handleMessage(Message msg) {
|
||||
switch (msg.what) {
|
||||
case REQUEST_LOAD_MODEL:
|
||||
// Load model and reload test image
|
||||
if (onLoadModel()) {
|
||||
receiver.sendEmptyMessage(RESPONSE_LOAD_MODEL_SUCCESSED);
|
||||
} else {
|
||||
receiver.sendEmptyMessage(RESPONSE_LOAD_MODEL_FAILED);
|
||||
}
|
||||
break;
|
||||
case REQUEST_RUN_MODEL:
|
||||
// Run model if model is loaded
|
||||
if (onRunModel()) {
|
||||
receiver.sendEmptyMessage(RESPONSE_RUN_MODEL_SUCCESSED);
|
||||
} else {
|
||||
receiver.sendEmptyMessage(RESPONSE_RUN_MODEL_FAILED);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onResume() {
|
||||
super.onResume();
|
||||
SharedPreferences sharedPreferences = PreferenceManager.getDefaultSharedPreferences(this);
|
||||
boolean settingsChanged = false;
|
||||
String model_path = sharedPreferences.getString(getString(R.string.MODEL_PATH_KEY),
|
||||
getString(R.string.MODEL_PATH_DEFAULT));
|
||||
String label_path = sharedPreferences.getString(getString(R.string.LABEL_PATH_KEY),
|
||||
getString(R.string.LABEL_PATH_DEFAULT));
|
||||
String image_path = sharedPreferences.getString(getString(R.string.IMAGE_PATH_KEY),
|
||||
getString(R.string.IMAGE_PATH_DEFAULT));
|
||||
settingsChanged |= !model_path.equalsIgnoreCase(modelPath);
|
||||
settingsChanged |= !label_path.equalsIgnoreCase(labelPath);
|
||||
settingsChanged |= !image_path.equalsIgnoreCase(imagePath);
|
||||
int cpu_thread_num = Integer.parseInt(sharedPreferences.getString(getString(R.string.CPU_THREAD_NUM_KEY),
|
||||
getString(R.string.CPU_THREAD_NUM_DEFAULT)));
|
||||
settingsChanged |= cpu_thread_num != cpuThreadNum;
|
||||
String cpu_power_mode =
|
||||
sharedPreferences.getString(getString(R.string.CPU_POWER_MODE_KEY),
|
||||
getString(R.string.CPU_POWER_MODE_DEFAULT));
|
||||
settingsChanged |= !cpu_power_mode.equalsIgnoreCase(cpuPowerMode);
|
||||
String input_color_format =
|
||||
sharedPreferences.getString(getString(R.string.INPUT_COLOR_FORMAT_KEY),
|
||||
getString(R.string.INPUT_COLOR_FORMAT_DEFAULT));
|
||||
settingsChanged |= !input_color_format.equalsIgnoreCase(inputColorFormat);
|
||||
long[] input_shape =
|
||||
Utils.parseLongsFromString(sharedPreferences.getString(getString(R.string.INPUT_SHAPE_KEY),
|
||||
getString(R.string.INPUT_SHAPE_DEFAULT)), ",");
|
||||
float[] input_mean =
|
||||
Utils.parseFloatsFromString(sharedPreferences.getString(getString(R.string.INPUT_MEAN_KEY),
|
||||
getString(R.string.INPUT_MEAN_DEFAULT)), ",");
|
||||
float[] input_std =
|
||||
Utils.parseFloatsFromString(sharedPreferences.getString(getString(R.string.INPUT_STD_KEY)
|
||||
, getString(R.string.INPUT_STD_DEFAULT)), ",");
|
||||
settingsChanged |= input_shape.length != inputShape.length;
|
||||
settingsChanged |= input_mean.length != inputMean.length;
|
||||
settingsChanged |= input_std.length != inputStd.length;
|
||||
if (!settingsChanged) {
|
||||
for (int i = 0; i < input_shape.length; i++) {
|
||||
settingsChanged |= input_shape[i] != inputShape[i];
|
||||
}
|
||||
for (int i = 0; i < input_mean.length; i++) {
|
||||
settingsChanged |= input_mean[i] != inputMean[i];
|
||||
}
|
||||
for (int i = 0; i < input_std.length; i++) {
|
||||
settingsChanged |= input_std[i] != inputStd[i];
|
||||
}
|
||||
}
|
||||
float score_threshold =
|
||||
Float.parseFloat(sharedPreferences.getString(getString(R.string.SCORE_THRESHOLD_KEY),
|
||||
getString(R.string.SCORE_THRESHOLD_DEFAULT)));
|
||||
settingsChanged |= scoreThreshold != score_threshold;
|
||||
if (settingsChanged) {
|
||||
modelPath = model_path;
|
||||
labelPath = label_path;
|
||||
imagePath = image_path;
|
||||
cpuThreadNum = cpu_thread_num;
|
||||
cpuPowerMode = cpu_power_mode;
|
||||
inputColorFormat = input_color_format;
|
||||
inputShape = input_shape;
|
||||
inputMean = input_mean;
|
||||
inputStd = input_std;
|
||||
scoreThreshold = score_threshold;
|
||||
// Update UI
|
||||
tvInputSetting.setText("Model: " + modelPath.substring(modelPath.lastIndexOf("/") + 1) + "\n" + "CPU" +
|
||||
" Thread Num: " + Integer.toString(cpuThreadNum) + "\n" + "CPU Power Mode: " + cpuPowerMode);
|
||||
tvInputSetting.scrollTo(0, 0);
|
||||
// Reload model if configure has been changed
|
||||
// loadModel();
|
||||
set_img();
|
||||
}
|
||||
}
|
||||
|
||||
public void loadModel() {
|
||||
pbLoadModel = ProgressDialog.show(this, "", "loading model...", false, false);
|
||||
sender.sendEmptyMessage(REQUEST_LOAD_MODEL);
|
||||
}
|
||||
|
||||
public void runModel() {
|
||||
pbRunModel = ProgressDialog.show(this, "", "running model...", false, false);
|
||||
sender.sendEmptyMessage(REQUEST_RUN_MODEL);
|
||||
}
|
||||
|
||||
public boolean onLoadModel() {
|
||||
return predictor.init(MainActivity.this, modelPath, labelPath, cpuThreadNum,
|
||||
cpuPowerMode,
|
||||
inputColorFormat,
|
||||
inputShape, inputMean,
|
||||
inputStd, scoreThreshold);
|
||||
}
|
||||
|
||||
public boolean onRunModel() {
|
||||
return predictor.isLoaded() && predictor.runModel();
|
||||
}
|
||||
|
||||
public void onLoadModelSuccessed() {
|
||||
// Load test image from path and run model
|
||||
tvStatus.setText("STATUS: load model successed");
|
||||
}
|
||||
|
||||
public void onLoadModelFailed() {
|
||||
tvStatus.setText("STATUS: load model failed");
|
||||
}
|
||||
|
||||
public void onRunModelSuccessed() {
|
||||
tvStatus.setText("STATUS: run model successed");
|
||||
// Obtain results and update UI
|
||||
tvInferenceTime.setText("Inference time: " + predictor.inferenceTime() + " ms");
|
||||
Bitmap outputImage = predictor.outputImage();
|
||||
if (outputImage != null) {
|
||||
ivInputImage.setImageBitmap(outputImage);
|
||||
}
|
||||
tvOutputResult.setText(predictor.outputResult());
|
||||
tvOutputResult.scrollTo(0, 0);
|
||||
}
|
||||
|
||||
public void onRunModelFailed() {
|
||||
tvStatus.setText("STATUS: run model failed");
|
||||
}
|
||||
|
||||
public void onImageChanged(Bitmap image) {
|
||||
// Rerun model if users pick test image from gallery or camera
|
||||
if (image != null && predictor.isLoaded()) {
|
||||
predictor.setInputImage(image);
|
||||
runModel();
|
||||
}
|
||||
}
|
||||
|
||||
public void set_img() {
|
||||
// Load test image from path and run model
|
||||
try {
|
||||
assetManager= getAssets();
|
||||
InputStream in=assetManager.open(imagePath);
|
||||
Bitmap bmp=BitmapFactory.decodeStream(in);
|
||||
ivInputImage.setImageBitmap(bmp);
|
||||
} catch (IOException e) {
|
||||
Toast.makeText(MainActivity.this, "Load image failed!", Toast.LENGTH_SHORT).show();
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public void onSettingsClicked() {
|
||||
startActivity(new Intent(MainActivity.this, SettingsActivity.class));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean onCreateOptionsMenu(Menu menu) {
|
||||
MenuInflater inflater = getMenuInflater();
|
||||
inflater.inflate(R.menu.menu_action_options, menu);
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean onPrepareOptionsMenu(Menu menu) {
|
||||
boolean isLoaded = predictor.isLoaded();
|
||||
return super.onPrepareOptionsMenu(menu);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean onOptionsItemSelected(MenuItem item) {
|
||||
switch (item.getItemId()) {
|
||||
case android.R.id.home:
|
||||
finish();
|
||||
break;
|
||||
case R.id.settings:
|
||||
if (requestAllPermissions()) {
|
||||
// Make sure we have SDCard r&w permissions to load model from SDCard
|
||||
onSettingsClicked();
|
||||
}
|
||||
break;
|
||||
}
|
||||
return super.onOptionsItemSelected(item);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onRequestPermissionsResult(int requestCode, @NonNull String[] permissions,
|
||||
@NonNull int[] grantResults) {
|
||||
super.onRequestPermissionsResult(requestCode, permissions, grantResults);
|
||||
if (grantResults[0] != PackageManager.PERMISSION_GRANTED || grantResults[1] != PackageManager.PERMISSION_GRANTED) {
|
||||
Toast.makeText(this, "Permission Denied", Toast.LENGTH_SHORT).show();
|
||||
}
|
||||
}
|
||||
|
||||
private boolean requestAllPermissions() {
|
||||
if (ContextCompat.checkSelfPermission(this, Manifest.permission.WRITE_EXTERNAL_STORAGE)
|
||||
!= PackageManager.PERMISSION_GRANTED || ContextCompat.checkSelfPermission(this,
|
||||
Manifest.permission.CAMERA)
|
||||
!= PackageManager.PERMISSION_GRANTED) {
|
||||
ActivityCompat.requestPermissions(this, new String[]{Manifest.permission.WRITE_EXTERNAL_STORAGE,
|
||||
Manifest.permission.CAMERA},
|
||||
0);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private void openGallery() {
|
||||
Intent intent = new Intent(Intent.ACTION_PICK, null);
|
||||
intent.setDataAndType(MediaStore.Images.Media.EXTERNAL_CONTENT_URI, "image/*");
|
||||
startActivityForResult(intent, OPEN_GALLERY_REQUEST_CODE);
|
||||
}
|
||||
|
||||
private void takePhoto() {
|
||||
Intent takePictureIntent = new Intent(MediaStore.ACTION_IMAGE_CAPTURE);
|
||||
// Ensure that there's a camera activity to handle the intent
|
||||
if (takePictureIntent.resolveActivity(getPackageManager()) != null) {
|
||||
// Create the File where the photo should go
|
||||
File photoFile = null;
|
||||
try {
|
||||
photoFile = createImageFile();
|
||||
} catch (IOException ex) {
|
||||
Log.e("MainActitity", ex.getMessage(), ex);
|
||||
Toast.makeText(MainActivity.this,
|
||||
"Create Camera temp file failed: " + ex.getMessage(), Toast.LENGTH_SHORT).show();
|
||||
}
|
||||
// Continue only if the File was successfully created
|
||||
if (photoFile != null) {
|
||||
Log.i(TAG, "FILEPATH " + getExternalFilesDir("Pictures").getAbsolutePath());
|
||||
Uri photoURI = FileProvider.getUriForFile(this,
|
||||
"com.baidu.paddle.lite.demo.ocr.fileprovider",
|
||||
photoFile);
|
||||
currentPhotoPath = photoFile.getAbsolutePath();
|
||||
takePictureIntent.putExtra(MediaStore.EXTRA_OUTPUT, photoURI);
|
||||
startActivityForResult(takePictureIntent, TAKE_PHOTO_REQUEST_CODE);
|
||||
Log.i(TAG, "startActivityForResult finished");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private File createImageFile() throws IOException {
|
||||
// Create an image file name
|
||||
String timeStamp = new SimpleDateFormat("yyyyMMdd_HHmmss").format(new Date());
|
||||
String imageFileName = "JPEG_" + timeStamp + "_";
|
||||
File storageDir = getExternalFilesDir(Environment.DIRECTORY_PICTURES);
|
||||
File image = File.createTempFile(
|
||||
imageFileName, /* prefix */
|
||||
".bmp", /* suffix */
|
||||
storageDir /* directory */
|
||||
);
|
||||
|
||||
return image;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onActivityResult(int requestCode, int resultCode, Intent data) {
|
||||
super.onActivityResult(requestCode, resultCode, data);
|
||||
if (resultCode == RESULT_OK) {
|
||||
switch (requestCode) {
|
||||
case OPEN_GALLERY_REQUEST_CODE:
|
||||
if (data == null) {
|
||||
break;
|
||||
}
|
||||
try {
|
||||
ContentResolver resolver = getContentResolver();
|
||||
Uri uri = data.getData();
|
||||
Bitmap image = MediaStore.Images.Media.getBitmap(resolver, uri);
|
||||
String[] proj = {MediaStore.Images.Media.DATA};
|
||||
Cursor cursor = managedQuery(uri, proj, null, null, null);
|
||||
cursor.moveToFirst();
|
||||
if (image != null) {
|
||||
// onImageChanged(image);
|
||||
ivInputImage.setImageBitmap(image);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
Log.e(TAG, e.toString());
|
||||
}
|
||||
break;
|
||||
case TAKE_PHOTO_REQUEST_CODE:
|
||||
if (currentPhotoPath != null) {
|
||||
ExifInterface exif = null;
|
||||
try {
|
||||
exif = new ExifInterface(currentPhotoPath);
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
int orientation = exif.getAttributeInt(ExifInterface.TAG_ORIENTATION,
|
||||
ExifInterface.ORIENTATION_UNDEFINED);
|
||||
Log.i(TAG, "rotation " + orientation);
|
||||
Bitmap image = BitmapFactory.decodeFile(currentPhotoPath);
|
||||
image = Utils.rotateBitmap(image, orientation);
|
||||
if (image != null) {
|
||||
// onImageChanged(image);
|
||||
ivInputImage.setImageBitmap(image);
|
||||
}
|
||||
} else {
|
||||
Log.e(TAG, "currentPhotoPath is null");
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void btn_load_model_click(View view) {
|
||||
if (predictor.isLoaded()){
|
||||
tvStatus.setText("STATUS: model has been loaded");
|
||||
}else{
|
||||
tvStatus.setText("STATUS: load model ......");
|
||||
loadModel();
|
||||
}
|
||||
}
|
||||
|
||||
public void btn_run_model_click(View view) {
|
||||
Bitmap image =((BitmapDrawable)ivInputImage.getDrawable()).getBitmap();
|
||||
if(image == null) {
|
||||
tvStatus.setText("STATUS: image is not exists");
|
||||
}
|
||||
else if (!predictor.isLoaded()){
|
||||
tvStatus.setText("STATUS: model is not loaded");
|
||||
}else{
|
||||
tvStatus.setText("STATUS: run model ...... ");
|
||||
predictor.setInputImage(image);
|
||||
runModel();
|
||||
}
|
||||
}
|
||||
public void btn_choice_img_click(View view) {
|
||||
if (requestAllPermissions()) {
|
||||
openGallery();
|
||||
}
|
||||
}
|
||||
|
||||
public void btn_take_photo_click(View view) {
|
||||
if (requestAllPermissions()) {
|
||||
takePhoto();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onDestroy() {
|
||||
if (predictor != null) {
|
||||
predictor.releaseModel();
|
||||
}
|
||||
worker.quit();
|
||||
super.onDestroy();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,157 @@
|
|||
package com.baidu.paddle.lite.demo.ocr;
|
||||
|
||||
import android.graphics.Bitmap;
|
||||
import android.graphics.BitmapFactory;
|
||||
import android.os.Build;
|
||||
import android.os.Bundle;
|
||||
import android.os.Handler;
|
||||
import android.os.HandlerThread;
|
||||
import android.os.Message;
|
||||
import android.util.Log;
|
||||
import android.view.View;
|
||||
import android.widget.Button;
|
||||
import android.widget.ImageView;
|
||||
import android.widget.TextView;
|
||||
import android.widget.Toast;
|
||||
|
||||
import androidx.appcompat.app.AppCompatActivity;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
public class MiniActivity extends AppCompatActivity {
|
||||
|
||||
|
||||
public static final int REQUEST_LOAD_MODEL = 0;
|
||||
public static final int REQUEST_RUN_MODEL = 1;
|
||||
public static final int REQUEST_UNLOAD_MODEL = 2;
|
||||
public static final int RESPONSE_LOAD_MODEL_SUCCESSED = 0;
|
||||
public static final int RESPONSE_LOAD_MODEL_FAILED = 1;
|
||||
public static final int RESPONSE_RUN_MODEL_SUCCESSED = 2;
|
||||
public static final int RESPONSE_RUN_MODEL_FAILED = 3;
|
||||
|
||||
private static final String TAG = "MiniActivity";
|
||||
|
||||
protected Handler receiver = null; // Receive messages from worker thread
|
||||
protected Handler sender = null; // Send command to worker thread
|
||||
protected HandlerThread worker = null; // Worker thread to load&run model
|
||||
protected volatile Predictor predictor = null;
|
||||
|
||||
private String assetModelDirPath = "models/ocr_v2_for_cpu";
|
||||
private String assetlabelFilePath = "labels/ppocr_keys_v1.txt";
|
||||
|
||||
private Button button;
|
||||
private ImageView imageView; // image result
|
||||
private TextView textView; // text result
|
||||
|
||||
@Override
|
||||
protected void onCreate(Bundle savedInstanceState) {
|
||||
super.onCreate(savedInstanceState);
|
||||
setContentView(R.layout.activity_mini);
|
||||
|
||||
Log.i(TAG, "SHOW in Logcat");
|
||||
|
||||
// Prepare the worker thread for mode loading and inference
|
||||
worker = new HandlerThread("Predictor Worker");
|
||||
worker.start();
|
||||
sender = new Handler(worker.getLooper()) {
|
||||
public void handleMessage(Message msg) {
|
||||
switch (msg.what) {
|
||||
case REQUEST_LOAD_MODEL:
|
||||
// Load model and reload test image
|
||||
if (!onLoadModel()) {
|
||||
runOnUiThread(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
Toast.makeText(MiniActivity.this, "Load model failed!", Toast.LENGTH_SHORT).show();
|
||||
}
|
||||
});
|
||||
}
|
||||
break;
|
||||
case REQUEST_RUN_MODEL:
|
||||
// Run model if model is loaded
|
||||
final boolean isSuccessed = onRunModel();
|
||||
runOnUiThread(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
if (isSuccessed){
|
||||
onRunModelSuccessed();
|
||||
}else{
|
||||
Toast.makeText(MiniActivity.this, "Run model failed!", Toast.LENGTH_SHORT).show();
|
||||
}
|
||||
}
|
||||
});
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
sender.sendEmptyMessage(REQUEST_LOAD_MODEL); // corresponding to REQUEST_LOAD_MODEL, to call onLoadModel()
|
||||
|
||||
imageView = findViewById(R.id.imageView);
|
||||
textView = findViewById(R.id.sample_text);
|
||||
button = findViewById(R.id.button);
|
||||
button.setOnClickListener(new View.OnClickListener() {
|
||||
@Override
|
||||
public void onClick(View v) {
|
||||
sender.sendEmptyMessage(REQUEST_RUN_MODEL);
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onDestroy() {
|
||||
onUnloadModel();
|
||||
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.JELLY_BEAN_MR2) {
|
||||
worker.quitSafely();
|
||||
} else {
|
||||
worker.quit();
|
||||
}
|
||||
super.onDestroy();
|
||||
}
|
||||
|
||||
/**
|
||||
* call in onCreate, model init
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
private boolean onLoadModel() {
|
||||
if (predictor == null) {
|
||||
predictor = new Predictor();
|
||||
}
|
||||
return predictor.init(this, assetModelDirPath, assetlabelFilePath);
|
||||
}
|
||||
|
||||
/**
|
||||
* init engine
|
||||
* call in onCreate
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
private boolean onRunModel() {
|
||||
try {
|
||||
String assetImagePath = "images/0.jpg";
|
||||
InputStream imageStream = getAssets().open(assetImagePath);
|
||||
Bitmap image = BitmapFactory.decodeStream(imageStream);
|
||||
// Input is Bitmap
|
||||
predictor.setInputImage(image);
|
||||
return predictor.isLoaded() && predictor.runModel();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private void onRunModelSuccessed() {
|
||||
Log.i(TAG, "onRunModelSuccessed");
|
||||
textView.setText(predictor.outputResult);
|
||||
imageView.setImageBitmap(predictor.outputImage);
|
||||
}
|
||||
|
||||
private void onUnloadModel() {
|
||||
if (predictor != null) {
|
||||
predictor.releaseModel();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,102 @@
|
|||
package com.baidu.paddle.lite.demo.ocr;
|
||||
|
||||
import android.graphics.Bitmap;
|
||||
import android.util.Log;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
public class OCRPredictorNative {
|
||||
|
||||
private static final AtomicBoolean isSOLoaded = new AtomicBoolean();
|
||||
|
||||
public static void loadLibrary() throws RuntimeException {
|
||||
if (!isSOLoaded.get() && isSOLoaded.compareAndSet(false, true)) {
|
||||
try {
|
||||
System.loadLibrary("Native");
|
||||
} catch (Throwable e) {
|
||||
RuntimeException exception = new RuntimeException(
|
||||
"Load libNative.so failed, please check it exists in apk file.", e);
|
||||
throw exception;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private Config config;
|
||||
|
||||
private long nativePointer = 0;
|
||||
|
||||
public OCRPredictorNative(Config config) {
|
||||
this.config = config;
|
||||
loadLibrary();
|
||||
nativePointer = init(config.detModelFilename, config.recModelFilename,config.clsModelFilename,
|
||||
config.cpuThreadNum, config.cpuPower);
|
||||
Log.i("OCRPredictorNative", "load success " + nativePointer);
|
||||
|
||||
}
|
||||
|
||||
|
||||
public ArrayList<OcrResultModel> runImage(float[] inputData, int width, int height, int channels, Bitmap originalImage) {
|
||||
Log.i("OCRPredictorNative", "begin to run image " + inputData.length + " " + width + " " + height);
|
||||
float[] dims = new float[]{1, channels, height, width};
|
||||
float[] rawResults = forward(nativePointer, inputData, dims, originalImage);
|
||||
ArrayList<OcrResultModel> results = postprocess(rawResults);
|
||||
return results;
|
||||
}
|
||||
|
||||
public static class Config {
|
||||
public int cpuThreadNum;
|
||||
public String cpuPower;
|
||||
public String detModelFilename;
|
||||
public String recModelFilename;
|
||||
public String clsModelFilename;
|
||||
|
||||
}
|
||||
|
||||
public void destory(){
|
||||
if (nativePointer > 0) {
|
||||
release(nativePointer);
|
||||
nativePointer = 0;
|
||||
}
|
||||
}
|
||||
|
||||
protected native long init(String detModelPath, String recModelPath,String clsModelPath, int threadNum, String cpuMode);
|
||||
|
||||
protected native float[] forward(long pointer, float[] buf, float[] ddims, Bitmap originalImage);
|
||||
|
||||
protected native void release(long pointer);
|
||||
|
||||
private ArrayList<OcrResultModel> postprocess(float[] raw) {
|
||||
ArrayList<OcrResultModel> results = new ArrayList<OcrResultModel>();
|
||||
int begin = 0;
|
||||
|
||||
while (begin < raw.length) {
|
||||
int point_num = Math.round(raw[begin]);
|
||||
int word_num = Math.round(raw[begin + 1]);
|
||||
OcrResultModel model = parse(raw, begin + 2, point_num, word_num);
|
||||
begin += 2 + 1 + point_num * 2 + word_num;
|
||||
results.add(model);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
private OcrResultModel parse(float[] raw, int begin, int pointNum, int wordNum) {
|
||||
int current = begin;
|
||||
OcrResultModel model = new OcrResultModel();
|
||||
model.setConfidence(raw[current]);
|
||||
current++;
|
||||
for (int i = 0; i < pointNum; i++) {
|
||||
model.addPoints(Math.round(raw[current + i * 2]), Math.round(raw[current + i * 2 + 1]));
|
||||
}
|
||||
current += (pointNum * 2);
|
||||
for (int i = 0; i < wordNum; i++) {
|
||||
int index = Math.round(raw[current + i]);
|
||||
model.addWordIndex(index);
|
||||
}
|
||||
Log.i("OCRPredictorNative", "word finished " + wordNum);
|
||||
return model;
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
package com.baidu.paddle.lite.demo.ocr;
|
||||
|
||||
import android.graphics.Point;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class OcrResultModel {
|
||||
private List<Point> points;
|
||||
private List<Integer> wordIndex;
|
||||
private String label;
|
||||
private float confidence;
|
||||
|
||||
public OcrResultModel() {
|
||||
super();
|
||||
points = new ArrayList<>();
|
||||
wordIndex = new ArrayList<>();
|
||||
}
|
||||
|
||||
public void addPoints(int x, int y) {
|
||||
Point point = new Point(x, y);
|
||||
points.add(point);
|
||||
}
|
||||
|
||||
public void addWordIndex(int index) {
|
||||
wordIndex.add(index);
|
||||
}
|
||||
|
||||
public List<Point> getPoints() {
|
||||
return points;
|
||||
}
|
||||
|
||||
public List<Integer> getWordIndex() {
|
||||
return wordIndex;
|
||||
}
|
||||
|
||||
public String getLabel() {
|
||||
return label;
|
||||
}
|
||||
|
||||
public void setLabel(String label) {
|
||||
this.label = label;
|
||||
}
|
||||
|
||||
public float getConfidence() {
|
||||
return confidence;
|
||||
}
|
||||
|
||||
public void setConfidence(float confidence) {
|
||||
this.confidence = confidence;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,356 @@
|
|||
package com.baidu.paddle.lite.demo.ocr;
|
||||
|
||||
import android.content.Context;
|
||||
import android.graphics.Bitmap;
|
||||
import android.graphics.Canvas;
|
||||
import android.graphics.Color;
|
||||
import android.graphics.Paint;
|
||||
import android.graphics.Path;
|
||||
import android.graphics.Point;
|
||||
import android.util.Log;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Vector;
|
||||
|
||||
import static android.graphics.Color.*;
|
||||
|
||||
public class Predictor {
|
||||
private static final String TAG = Predictor.class.getSimpleName();
|
||||
public boolean isLoaded = false;
|
||||
public int warmupIterNum = 1;
|
||||
public int inferIterNum = 1;
|
||||
public int cpuThreadNum = 4;
|
||||
public String cpuPowerMode = "LITE_POWER_HIGH";
|
||||
public String modelPath = "";
|
||||
public String modelName = "";
|
||||
protected OCRPredictorNative paddlePredictor = null;
|
||||
protected float inferenceTime = 0;
|
||||
// Only for object detection
|
||||
protected Vector<String> wordLabels = new Vector<String>();
|
||||
protected String inputColorFormat = "BGR";
|
||||
protected long[] inputShape = new long[]{1, 3, 960};
|
||||
protected float[] inputMean = new float[]{0.485f, 0.456f, 0.406f};
|
||||
protected float[] inputStd = new float[]{1.0f / 0.229f, 1.0f / 0.224f, 1.0f / 0.225f};
|
||||
protected float scoreThreshold = 0.1f;
|
||||
protected Bitmap inputImage = null;
|
||||
protected Bitmap outputImage = null;
|
||||
protected volatile String outputResult = "";
|
||||
protected float preprocessTime = 0;
|
||||
protected float postprocessTime = 0;
|
||||
|
||||
|
||||
public Predictor() {
|
||||
}
|
||||
|
||||
public boolean init(Context appCtx, String modelPath, String labelPath) {
|
||||
isLoaded = loadModel(appCtx, modelPath, cpuThreadNum, cpuPowerMode);
|
||||
if (!isLoaded) {
|
||||
return false;
|
||||
}
|
||||
isLoaded = loadLabel(appCtx, labelPath);
|
||||
return isLoaded;
|
||||
}
|
||||
|
||||
|
||||
public boolean init(Context appCtx, String modelPath, String labelPath, int cpuThreadNum, String cpuPowerMode,
|
||||
String inputColorFormat,
|
||||
long[] inputShape, float[] inputMean,
|
||||
float[] inputStd, float scoreThreshold) {
|
||||
if (inputShape.length != 3) {
|
||||
Log.e(TAG, "Size of input shape should be: 3");
|
||||
return false;
|
||||
}
|
||||
if (inputMean.length != inputShape[1]) {
|
||||
Log.e(TAG, "Size of input mean should be: " + Long.toString(inputShape[1]));
|
||||
return false;
|
||||
}
|
||||
if (inputStd.length != inputShape[1]) {
|
||||
Log.e(TAG, "Size of input std should be: " + Long.toString(inputShape[1]));
|
||||
return false;
|
||||
}
|
||||
if (inputShape[0] != 1) {
|
||||
Log.e(TAG, "Only one batch is supported in the image classification demo, you can use any batch size in " +
|
||||
"your Apps!");
|
||||
return false;
|
||||
}
|
||||
if (inputShape[1] != 1 && inputShape[1] != 3) {
|
||||
Log.e(TAG, "Only one/three channels are supported in the image classification demo, you can use any " +
|
||||
"channel size in your Apps!");
|
||||
return false;
|
||||
}
|
||||
if (!inputColorFormat.equalsIgnoreCase("BGR")) {
|
||||
Log.e(TAG, "Only BGR color format is supported.");
|
||||
return false;
|
||||
}
|
||||
boolean isLoaded = init(appCtx, modelPath, labelPath);
|
||||
if (!isLoaded) {
|
||||
return false;
|
||||
}
|
||||
this.inputColorFormat = inputColorFormat;
|
||||
this.inputShape = inputShape;
|
||||
this.inputMean = inputMean;
|
||||
this.inputStd = inputStd;
|
||||
this.scoreThreshold = scoreThreshold;
|
||||
return true;
|
||||
}
|
||||
|
||||
protected boolean loadModel(Context appCtx, String modelPath, int cpuThreadNum, String cpuPowerMode) {
|
||||
// Release model if exists
|
||||
releaseModel();
|
||||
|
||||
// Load model
|
||||
if (modelPath.isEmpty()) {
|
||||
return false;
|
||||
}
|
||||
String realPath = modelPath;
|
||||
if (!modelPath.substring(0, 1).equals("/")) {
|
||||
// Read model files from custom path if the first character of mode path is '/'
|
||||
// otherwise copy model to cache from assets
|
||||
realPath = appCtx.getCacheDir() + "/" + modelPath;
|
||||
Utils.copyDirectoryFromAssets(appCtx, modelPath, realPath);
|
||||
}
|
||||
if (realPath.isEmpty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
OCRPredictorNative.Config config = new OCRPredictorNative.Config();
|
||||
config.cpuThreadNum = cpuThreadNum;
|
||||
config.detModelFilename = realPath + File.separator + "ch_ppocr_mobile_v2.0_det_opt.nb";
|
||||
config.recModelFilename = realPath + File.separator + "ch_ppocr_mobile_v2.0_rec_opt.nb";
|
||||
config.clsModelFilename = realPath + File.separator + "ch_ppocr_mobile_v2.0_cls_opt.nb";
|
||||
Log.e("Predictor", "model path" + config.detModelFilename + " ; " + config.recModelFilename + ";" + config.clsModelFilename);
|
||||
config.cpuPower = cpuPowerMode;
|
||||
paddlePredictor = new OCRPredictorNative(config);
|
||||
|
||||
this.cpuThreadNum = cpuThreadNum;
|
||||
this.cpuPowerMode = cpuPowerMode;
|
||||
this.modelPath = realPath;
|
||||
this.modelName = realPath.substring(realPath.lastIndexOf("/") + 1);
|
||||
return true;
|
||||
}
|
||||
|
||||
public void releaseModel() {
|
||||
if (paddlePredictor != null) {
|
||||
paddlePredictor.destory();
|
||||
paddlePredictor = null;
|
||||
}
|
||||
isLoaded = false;
|
||||
cpuThreadNum = 1;
|
||||
cpuPowerMode = "LITE_POWER_HIGH";
|
||||
modelPath = "";
|
||||
modelName = "";
|
||||
}
|
||||
|
||||
protected boolean loadLabel(Context appCtx, String labelPath) {
|
||||
wordLabels.clear();
|
||||
wordLabels.add("black");
|
||||
// Load word labels from file
|
||||
try {
|
||||
InputStream assetsInputStream = appCtx.getAssets().open(labelPath);
|
||||
int available = assetsInputStream.available();
|
||||
byte[] lines = new byte[available];
|
||||
assetsInputStream.read(lines);
|
||||
assetsInputStream.close();
|
||||
String words = new String(lines);
|
||||
String[] contents = words.split("\n");
|
||||
for (String content : contents) {
|
||||
wordLabels.add(content);
|
||||
}
|
||||
Log.i(TAG, "Word label size: " + wordLabels.size());
|
||||
} catch (Exception e) {
|
||||
Log.e(TAG, e.getMessage());
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
public boolean runModel() {
|
||||
if (inputImage == null || !isLoaded()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Pre-process image, and feed input tensor with pre-processed data
|
||||
|
||||
Bitmap scaleImage = Utils.resizeWithStep(inputImage, Long.valueOf(inputShape[2]).intValue(), 32);
|
||||
|
||||
Date start = new Date();
|
||||
int channels = (int) inputShape[1];
|
||||
int width = scaleImage.getWidth();
|
||||
int height = scaleImage.getHeight();
|
||||
float[] inputData = new float[channels * width * height];
|
||||
if (channels == 3) {
|
||||
int[] channelIdx = null;
|
||||
if (inputColorFormat.equalsIgnoreCase("RGB")) {
|
||||
channelIdx = new int[]{0, 1, 2};
|
||||
} else if (inputColorFormat.equalsIgnoreCase("BGR")) {
|
||||
channelIdx = new int[]{2, 1, 0};
|
||||
} else {
|
||||
Log.i(TAG, "Unknown color format " + inputColorFormat + ", only RGB and BGR color format is " +
|
||||
"supported!");
|
||||
return false;
|
||||
}
|
||||
|
||||
int[] channelStride = new int[]{width * height, width * height * 2};
|
||||
int[] pixels=new int[width*height];
|
||||
scaleImage.getPixels(pixels,0,scaleImage.getWidth(),0,0,scaleImage.getWidth(),scaleImage.getHeight());
|
||||
for (int i = 0; i < pixels.length; i++) {
|
||||
int color = pixels[i];
|
||||
float[] rgb = new float[]{(float) red(color) / 255.0f, (float) green(color) / 255.0f,
|
||||
(float) blue(color) / 255.0f};
|
||||
inputData[i] = (rgb[channelIdx[0]] - inputMean[0]) / inputStd[0];
|
||||
inputData[i + channelStride[0]] = (rgb[channelIdx[1]] - inputMean[1]) / inputStd[1];
|
||||
inputData[i+ channelStride[1]] = (rgb[channelIdx[2]] - inputMean[2]) / inputStd[2];
|
||||
}
|
||||
} else if (channels == 1) {
|
||||
int[] pixels=new int[width*height];
|
||||
scaleImage.getPixels(pixels,0,scaleImage.getWidth(),0,0,scaleImage.getWidth(),scaleImage.getHeight());
|
||||
for (int i = 0; i < pixels.length; i++) {
|
||||
int color = pixels[i];
|
||||
float gray = (float) (red(color) + green(color) + blue(color)) / 3.0f / 255.0f;
|
||||
inputData[i] = (gray - inputMean[0]) / inputStd[0];
|
||||
}
|
||||
} else {
|
||||
Log.i(TAG, "Unsupported channel size " + Integer.toString(channels) + ", only channel 1 and 3 is " +
|
||||
"supported!");
|
||||
return false;
|
||||
}
|
||||
float[] pixels = inputData;
|
||||
Log.i(TAG, "pixels " + pixels[0] + " " + pixels[1] + " " + pixels[2] + " " + pixels[3]
|
||||
+ " " + pixels[pixels.length / 2] + " " + pixels[pixels.length / 2 + 1] + " " + pixels[pixels.length - 2] + " " + pixels[pixels.length - 1]);
|
||||
Date end = new Date();
|
||||
preprocessTime = (float) (end.getTime() - start.getTime());
|
||||
|
||||
// Warm up
|
||||
for (int i = 0; i < warmupIterNum; i++) {
|
||||
paddlePredictor.runImage(inputData, width, height, channels, inputImage);
|
||||
}
|
||||
warmupIterNum = 0; // do not need warm
|
||||
// Run inference
|
||||
start = new Date();
|
||||
ArrayList<OcrResultModel> results = paddlePredictor.runImage(inputData, width, height, channels, inputImage);
|
||||
end = new Date();
|
||||
inferenceTime = (end.getTime() - start.getTime()) / (float) inferIterNum;
|
||||
|
||||
results = postprocess(results);
|
||||
Log.i(TAG, "[stat] Preprocess Time: " + preprocessTime
|
||||
+ " ; Inference Time: " + inferenceTime + " ;Box Size " + results.size());
|
||||
drawResults(results);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
public boolean isLoaded() {
|
||||
return paddlePredictor != null && isLoaded;
|
||||
}
|
||||
|
||||
public String modelPath() {
|
||||
return modelPath;
|
||||
}
|
||||
|
||||
public String modelName() {
|
||||
return modelName;
|
||||
}
|
||||
|
||||
public int cpuThreadNum() {
|
||||
return cpuThreadNum;
|
||||
}
|
||||
|
||||
public String cpuPowerMode() {
|
||||
return cpuPowerMode;
|
||||
}
|
||||
|
||||
public float inferenceTime() {
|
||||
return inferenceTime;
|
||||
}
|
||||
|
||||
public Bitmap inputImage() {
|
||||
return inputImage;
|
||||
}
|
||||
|
||||
public Bitmap outputImage() {
|
||||
return outputImage;
|
||||
}
|
||||
|
||||
public String outputResult() {
|
||||
return outputResult;
|
||||
}
|
||||
|
||||
public float preprocessTime() {
|
||||
return preprocessTime;
|
||||
}
|
||||
|
||||
public float postprocessTime() {
|
||||
return postprocessTime;
|
||||
}
|
||||
|
||||
|
||||
public void setInputImage(Bitmap image) {
|
||||
if (image == null) {
|
||||
return;
|
||||
}
|
||||
this.inputImage = image.copy(Bitmap.Config.ARGB_8888, true);
|
||||
}
|
||||
|
||||
private ArrayList<OcrResultModel> postprocess(ArrayList<OcrResultModel> results) {
|
||||
for (OcrResultModel r : results) {
|
||||
StringBuffer word = new StringBuffer();
|
||||
for (int index : r.getWordIndex()) {
|
||||
if (index >= 0 && index < wordLabels.size()) {
|
||||
word.append(wordLabels.get(index));
|
||||
} else {
|
||||
Log.e(TAG, "Word index is not in label list:" + index);
|
||||
word.append("×");
|
||||
}
|
||||
}
|
||||
r.setLabel(word.toString());
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
private void drawResults(ArrayList<OcrResultModel> results) {
|
||||
StringBuffer outputResultSb = new StringBuffer("");
|
||||
for (int i = 0; i < results.size(); i++) {
|
||||
OcrResultModel result = results.get(i);
|
||||
StringBuilder sb = new StringBuilder("");
|
||||
sb.append(result.getLabel());
|
||||
sb.append(" ").append(result.getConfidence());
|
||||
sb.append("; Points: ");
|
||||
for (Point p : result.getPoints()) {
|
||||
sb.append("(").append(p.x).append(",").append(p.y).append(") ");
|
||||
}
|
||||
Log.i(TAG, sb.toString()); // show LOG in Logcat panel
|
||||
outputResultSb.append(i + 1).append(": ").append(result.getLabel()).append("\n");
|
||||
}
|
||||
outputResult = outputResultSb.toString();
|
||||
outputImage = inputImage;
|
||||
Canvas canvas = new Canvas(outputImage);
|
||||
Paint paintFillAlpha = new Paint();
|
||||
paintFillAlpha.setStyle(Paint.Style.FILL);
|
||||
paintFillAlpha.setColor(Color.parseColor("#3B85F5"));
|
||||
paintFillAlpha.setAlpha(50);
|
||||
|
||||
Paint paint = new Paint();
|
||||
paint.setColor(Color.parseColor("#3B85F5"));
|
||||
paint.setStrokeWidth(5);
|
||||
paint.setStyle(Paint.Style.STROKE);
|
||||
|
||||
for (OcrResultModel result : results) {
|
||||
Path path = new Path();
|
||||
List<Point> points = result.getPoints();
|
||||
path.moveTo(points.get(0).x, points.get(0).y);
|
||||
for (int i = points.size() - 1; i >= 0; i--) {
|
||||
Point p = points.get(i);
|
||||
path.lineTo(p.x, p.y);
|
||||
}
|
||||
canvas.drawPath(path, paint);
|
||||
canvas.drawPath(path, paintFillAlpha);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,201 @@
|
|||
package com.baidu.paddle.lite.demo.ocr;
|
||||
|
||||
import android.content.SharedPreferences;
|
||||
import android.os.Bundle;
|
||||
import android.preference.CheckBoxPreference;
|
||||
import android.preference.EditTextPreference;
|
||||
import android.preference.ListPreference;
|
||||
|
||||
import androidx.appcompat.app.ActionBar;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
public class SettingsActivity extends AppCompatPreferenceActivity implements SharedPreferences.OnSharedPreferenceChangeListener {
|
||||
ListPreference lpChoosePreInstalledModel = null;
|
||||
CheckBoxPreference cbEnableCustomSettings = null;
|
||||
EditTextPreference etModelPath = null;
|
||||
EditTextPreference etLabelPath = null;
|
||||
ListPreference etImagePath = null;
|
||||
ListPreference lpCPUThreadNum = null;
|
||||
ListPreference lpCPUPowerMode = null;
|
||||
ListPreference lpInputColorFormat = null;
|
||||
EditTextPreference etInputShape = null;
|
||||
EditTextPreference etInputMean = null;
|
||||
EditTextPreference etInputStd = null;
|
||||
EditTextPreference etScoreThreshold = null;
|
||||
|
||||
List<String> preInstalledModelPaths = null;
|
||||
List<String> preInstalledLabelPaths = null;
|
||||
List<String> preInstalledImagePaths = null;
|
||||
List<String> preInstalledInputShapes = null;
|
||||
List<String> preInstalledCPUThreadNums = null;
|
||||
List<String> preInstalledCPUPowerModes = null;
|
||||
List<String> preInstalledInputColorFormats = null;
|
||||
List<String> preInstalledInputMeans = null;
|
||||
List<String> preInstalledInputStds = null;
|
||||
List<String> preInstalledScoreThresholds = null;
|
||||
|
||||
@Override
|
||||
public void onCreate(Bundle savedInstanceState) {
|
||||
super.onCreate(savedInstanceState);
|
||||
addPreferencesFromResource(R.xml.settings);
|
||||
ActionBar supportActionBar = getSupportActionBar();
|
||||
if (supportActionBar != null) {
|
||||
supportActionBar.setDisplayHomeAsUpEnabled(true);
|
||||
}
|
||||
|
||||
// Initialized pre-installed models
|
||||
preInstalledModelPaths = new ArrayList<String>();
|
||||
preInstalledLabelPaths = new ArrayList<String>();
|
||||
preInstalledImagePaths = new ArrayList<String>();
|
||||
preInstalledInputShapes = new ArrayList<String>();
|
||||
preInstalledCPUThreadNums = new ArrayList<String>();
|
||||
preInstalledCPUPowerModes = new ArrayList<String>();
|
||||
preInstalledInputColorFormats = new ArrayList<String>();
|
||||
preInstalledInputMeans = new ArrayList<String>();
|
||||
preInstalledInputStds = new ArrayList<String>();
|
||||
preInstalledScoreThresholds = new ArrayList<String>();
|
||||
// Add ssd_mobilenet_v1_pascalvoc_for_cpu
|
||||
preInstalledModelPaths.add(getString(R.string.MODEL_PATH_DEFAULT));
|
||||
preInstalledLabelPaths.add(getString(R.string.LABEL_PATH_DEFAULT));
|
||||
preInstalledImagePaths.add(getString(R.string.IMAGE_PATH_DEFAULT));
|
||||
preInstalledCPUThreadNums.add(getString(R.string.CPU_THREAD_NUM_DEFAULT));
|
||||
preInstalledCPUPowerModes.add(getString(R.string.CPU_POWER_MODE_DEFAULT));
|
||||
preInstalledInputColorFormats.add(getString(R.string.INPUT_COLOR_FORMAT_DEFAULT));
|
||||
preInstalledInputShapes.add(getString(R.string.INPUT_SHAPE_DEFAULT));
|
||||
preInstalledInputMeans.add(getString(R.string.INPUT_MEAN_DEFAULT));
|
||||
preInstalledInputStds.add(getString(R.string.INPUT_STD_DEFAULT));
|
||||
preInstalledScoreThresholds.add(getString(R.string.SCORE_THRESHOLD_DEFAULT));
|
||||
|
||||
// Setup UI components
|
||||
lpChoosePreInstalledModel =
|
||||
(ListPreference) findPreference(getString(R.string.CHOOSE_PRE_INSTALLED_MODEL_KEY));
|
||||
String[] preInstalledModelNames = new String[preInstalledModelPaths.size()];
|
||||
for (int i = 0; i < preInstalledModelPaths.size(); i++) {
|
||||
preInstalledModelNames[i] =
|
||||
preInstalledModelPaths.get(i).substring(preInstalledModelPaths.get(i).lastIndexOf("/") + 1);
|
||||
}
|
||||
lpChoosePreInstalledModel.setEntries(preInstalledModelNames);
|
||||
lpChoosePreInstalledModel.setEntryValues(preInstalledModelPaths.toArray(new String[preInstalledModelPaths.size()]));
|
||||
cbEnableCustomSettings =
|
||||
(CheckBoxPreference) findPreference(getString(R.string.ENABLE_CUSTOM_SETTINGS_KEY));
|
||||
etModelPath = (EditTextPreference) findPreference(getString(R.string.MODEL_PATH_KEY));
|
||||
etModelPath.setTitle("Model Path (SDCard: " + Utils.getSDCardDirectory() + ")");
|
||||
etLabelPath = (EditTextPreference) findPreference(getString(R.string.LABEL_PATH_KEY));
|
||||
etImagePath = (ListPreference) findPreference(getString(R.string.IMAGE_PATH_KEY));
|
||||
lpCPUThreadNum =
|
||||
(ListPreference) findPreference(getString(R.string.CPU_THREAD_NUM_KEY));
|
||||
lpCPUPowerMode =
|
||||
(ListPreference) findPreference(getString(R.string.CPU_POWER_MODE_KEY));
|
||||
lpInputColorFormat =
|
||||
(ListPreference) findPreference(getString(R.string.INPUT_COLOR_FORMAT_KEY));
|
||||
etInputShape = (EditTextPreference) findPreference(getString(R.string.INPUT_SHAPE_KEY));
|
||||
etInputMean = (EditTextPreference) findPreference(getString(R.string.INPUT_MEAN_KEY));
|
||||
etInputStd = (EditTextPreference) findPreference(getString(R.string.INPUT_STD_KEY));
|
||||
etScoreThreshold = (EditTextPreference) findPreference(getString(R.string.SCORE_THRESHOLD_KEY));
|
||||
}
|
||||
|
||||
private void reloadPreferenceAndUpdateUI() {
|
||||
SharedPreferences sharedPreferences = getPreferenceScreen().getSharedPreferences();
|
||||
boolean enableCustomSettings =
|
||||
sharedPreferences.getBoolean(getString(R.string.ENABLE_CUSTOM_SETTINGS_KEY), false);
|
||||
String modelPath = sharedPreferences.getString(getString(R.string.CHOOSE_PRE_INSTALLED_MODEL_KEY),
|
||||
getString(R.string.MODEL_PATH_DEFAULT));
|
||||
int modelIdx = lpChoosePreInstalledModel.findIndexOfValue(modelPath);
|
||||
if (modelIdx >= 0 && modelIdx < preInstalledModelPaths.size()) {
|
||||
if (!enableCustomSettings) {
|
||||
SharedPreferences.Editor editor = sharedPreferences.edit();
|
||||
editor.putString(getString(R.string.MODEL_PATH_KEY), preInstalledModelPaths.get(modelIdx));
|
||||
editor.putString(getString(R.string.LABEL_PATH_KEY), preInstalledLabelPaths.get(modelIdx));
|
||||
editor.putString(getString(R.string.IMAGE_PATH_KEY), preInstalledImagePaths.get(modelIdx));
|
||||
editor.putString(getString(R.string.CPU_THREAD_NUM_KEY), preInstalledCPUThreadNums.get(modelIdx));
|
||||
editor.putString(getString(R.string.CPU_POWER_MODE_KEY), preInstalledCPUPowerModes.get(modelIdx));
|
||||
editor.putString(getString(R.string.INPUT_COLOR_FORMAT_KEY),
|
||||
preInstalledInputColorFormats.get(modelIdx));
|
||||
editor.putString(getString(R.string.INPUT_SHAPE_KEY), preInstalledInputShapes.get(modelIdx));
|
||||
editor.putString(getString(R.string.INPUT_MEAN_KEY), preInstalledInputMeans.get(modelIdx));
|
||||
editor.putString(getString(R.string.INPUT_STD_KEY), preInstalledInputStds.get(modelIdx));
|
||||
editor.putString(getString(R.string.SCORE_THRESHOLD_KEY),
|
||||
preInstalledScoreThresholds.get(modelIdx));
|
||||
editor.apply();
|
||||
}
|
||||
lpChoosePreInstalledModel.setSummary(modelPath);
|
||||
}
|
||||
cbEnableCustomSettings.setChecked(enableCustomSettings);
|
||||
etModelPath.setEnabled(enableCustomSettings);
|
||||
etLabelPath.setEnabled(enableCustomSettings);
|
||||
etImagePath.setEnabled(enableCustomSettings);
|
||||
lpCPUThreadNum.setEnabled(enableCustomSettings);
|
||||
lpCPUPowerMode.setEnabled(enableCustomSettings);
|
||||
lpInputColorFormat.setEnabled(enableCustomSettings);
|
||||
etInputShape.setEnabled(enableCustomSettings);
|
||||
etInputMean.setEnabled(enableCustomSettings);
|
||||
etInputStd.setEnabled(enableCustomSettings);
|
||||
etScoreThreshold.setEnabled(enableCustomSettings);
|
||||
modelPath = sharedPreferences.getString(getString(R.string.MODEL_PATH_KEY),
|
||||
getString(R.string.MODEL_PATH_DEFAULT));
|
||||
String labelPath = sharedPreferences.getString(getString(R.string.LABEL_PATH_KEY),
|
||||
getString(R.string.LABEL_PATH_DEFAULT));
|
||||
String imagePath = sharedPreferences.getString(getString(R.string.IMAGE_PATH_KEY),
|
||||
getString(R.string.IMAGE_PATH_DEFAULT));
|
||||
String cpuThreadNum = sharedPreferences.getString(getString(R.string.CPU_THREAD_NUM_KEY),
|
||||
getString(R.string.CPU_THREAD_NUM_DEFAULT));
|
||||
String cpuPowerMode = sharedPreferences.getString(getString(R.string.CPU_POWER_MODE_KEY),
|
||||
getString(R.string.CPU_POWER_MODE_DEFAULT));
|
||||
String inputColorFormat = sharedPreferences.getString(getString(R.string.INPUT_COLOR_FORMAT_KEY),
|
||||
getString(R.string.INPUT_COLOR_FORMAT_DEFAULT));
|
||||
String inputShape = sharedPreferences.getString(getString(R.string.INPUT_SHAPE_KEY),
|
||||
getString(R.string.INPUT_SHAPE_DEFAULT));
|
||||
String inputMean = sharedPreferences.getString(getString(R.string.INPUT_MEAN_KEY),
|
||||
getString(R.string.INPUT_MEAN_DEFAULT));
|
||||
String inputStd = sharedPreferences.getString(getString(R.string.INPUT_STD_KEY),
|
||||
getString(R.string.INPUT_STD_DEFAULT));
|
||||
String scoreThreshold = sharedPreferences.getString(getString(R.string.SCORE_THRESHOLD_KEY),
|
||||
getString(R.string.SCORE_THRESHOLD_DEFAULT));
|
||||
etModelPath.setSummary(modelPath);
|
||||
etModelPath.setText(modelPath);
|
||||
etLabelPath.setSummary(labelPath);
|
||||
etLabelPath.setText(labelPath);
|
||||
etImagePath.setSummary(imagePath);
|
||||
etImagePath.setValue(imagePath);
|
||||
lpCPUThreadNum.setValue(cpuThreadNum);
|
||||
lpCPUThreadNum.setSummary(cpuThreadNum);
|
||||
lpCPUPowerMode.setValue(cpuPowerMode);
|
||||
lpCPUPowerMode.setSummary(cpuPowerMode);
|
||||
lpInputColorFormat.setValue(inputColorFormat);
|
||||
lpInputColorFormat.setSummary(inputColorFormat);
|
||||
etInputShape.setSummary(inputShape);
|
||||
etInputShape.setText(inputShape);
|
||||
etInputMean.setSummary(inputMean);
|
||||
etInputMean.setText(inputMean);
|
||||
etInputStd.setSummary(inputStd);
|
||||
etInputStd.setText(inputStd);
|
||||
etScoreThreshold.setText(scoreThreshold);
|
||||
etScoreThreshold.setSummary(scoreThreshold);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onResume() {
|
||||
super.onResume();
|
||||
getPreferenceScreen().getSharedPreferences().registerOnSharedPreferenceChangeListener(this);
|
||||
reloadPreferenceAndUpdateUI();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onPause() {
|
||||
super.onPause();
|
||||
getPreferenceScreen().getSharedPreferences().unregisterOnSharedPreferenceChangeListener(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onSharedPreferenceChanged(SharedPreferences sharedPreferences, String key) {
|
||||
if (key.equals(getString(R.string.CHOOSE_PRE_INSTALLED_MODEL_KEY))) {
|
||||
SharedPreferences.Editor editor = sharedPreferences.edit();
|
||||
editor.putBoolean(getString(R.string.ENABLE_CUSTOM_SETTINGS_KEY), false);
|
||||
editor.commit();
|
||||
}
|
||||
reloadPreferenceAndUpdateUI();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,159 @@
|
|||
package com.baidu.paddle.lite.demo.ocr;
|
||||
|
||||
import android.content.Context;
|
||||
import android.graphics.Bitmap;
|
||||
import android.graphics.Matrix;
|
||||
import android.media.ExifInterface;
|
||||
import android.os.Environment;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
public class Utils {
|
||||
private static final String TAG = Utils.class.getSimpleName();
|
||||
|
||||
public static void copyFileFromAssets(Context appCtx, String srcPath, String dstPath) {
|
||||
if (srcPath.isEmpty() || dstPath.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
InputStream is = null;
|
||||
OutputStream os = null;
|
||||
try {
|
||||
is = new BufferedInputStream(appCtx.getAssets().open(srcPath));
|
||||
os = new BufferedOutputStream(new FileOutputStream(new File(dstPath)));
|
||||
byte[] buffer = new byte[1024];
|
||||
int length = 0;
|
||||
while ((length = is.read(buffer)) != -1) {
|
||||
os.write(buffer, 0, length);
|
||||
}
|
||||
} catch (FileNotFoundException e) {
|
||||
e.printStackTrace();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
try {
|
||||
os.close();
|
||||
is.close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static void copyDirectoryFromAssets(Context appCtx, String srcDir, String dstDir) {
|
||||
if (srcDir.isEmpty() || dstDir.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
if (!new File(dstDir).exists()) {
|
||||
new File(dstDir).mkdirs();
|
||||
}
|
||||
for (String fileName : appCtx.getAssets().list(srcDir)) {
|
||||
String srcSubPath = srcDir + File.separator + fileName;
|
||||
String dstSubPath = dstDir + File.separator + fileName;
|
||||
if (new File(srcSubPath).isDirectory()) {
|
||||
copyDirectoryFromAssets(appCtx, srcSubPath, dstSubPath);
|
||||
} else {
|
||||
copyFileFromAssets(appCtx, srcSubPath, dstSubPath);
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public static float[] parseFloatsFromString(String string, String delimiter) {
|
||||
String[] pieces = string.trim().toLowerCase().split(delimiter);
|
||||
float[] floats = new float[pieces.length];
|
||||
for (int i = 0; i < pieces.length; i++) {
|
||||
floats[i] = Float.parseFloat(pieces[i].trim());
|
||||
}
|
||||
return floats;
|
||||
}
|
||||
|
||||
public static long[] parseLongsFromString(String string, String delimiter) {
|
||||
String[] pieces = string.trim().toLowerCase().split(delimiter);
|
||||
long[] longs = new long[pieces.length];
|
||||
for (int i = 0; i < pieces.length; i++) {
|
||||
longs[i] = Long.parseLong(pieces[i].trim());
|
||||
}
|
||||
return longs;
|
||||
}
|
||||
|
||||
public static String getSDCardDirectory() {
|
||||
return Environment.getExternalStorageDirectory().getAbsolutePath();
|
||||
}
|
||||
|
||||
public static boolean isSupportedNPU() {
|
||||
return false;
|
||||
// String hardware = android.os.Build.HARDWARE;
|
||||
// return hardware.equalsIgnoreCase("kirin810") || hardware.equalsIgnoreCase("kirin990");
|
||||
}
|
||||
|
||||
public static Bitmap resizeWithStep(Bitmap bitmap, int maxLength, int step) {
|
||||
int width = bitmap.getWidth();
|
||||
int height = bitmap.getHeight();
|
||||
int maxWH = Math.max(width, height);
|
||||
float ratio = 1;
|
||||
int newWidth = width;
|
||||
int newHeight = height;
|
||||
if (maxWH > maxLength) {
|
||||
ratio = maxLength * 1.0f / maxWH;
|
||||
newWidth = (int) Math.floor(ratio * width);
|
||||
newHeight = (int) Math.floor(ratio * height);
|
||||
}
|
||||
|
||||
newWidth = newWidth - newWidth % step;
|
||||
if (newWidth == 0) {
|
||||
newWidth = step;
|
||||
}
|
||||
newHeight = newHeight - newHeight % step;
|
||||
if (newHeight == 0) {
|
||||
newHeight = step;
|
||||
}
|
||||
return Bitmap.createScaledBitmap(bitmap, newWidth, newHeight, true);
|
||||
}
|
||||
|
||||
public static Bitmap rotateBitmap(Bitmap bitmap, int orientation) {
|
||||
|
||||
Matrix matrix = new Matrix();
|
||||
switch (orientation) {
|
||||
case ExifInterface.ORIENTATION_NORMAL:
|
||||
return bitmap;
|
||||
case ExifInterface.ORIENTATION_FLIP_HORIZONTAL:
|
||||
matrix.setScale(-1, 1);
|
||||
break;
|
||||
case ExifInterface.ORIENTATION_ROTATE_180:
|
||||
matrix.setRotate(180);
|
||||
break;
|
||||
case ExifInterface.ORIENTATION_FLIP_VERTICAL:
|
||||
matrix.setRotate(180);
|
||||
matrix.postScale(-1, 1);
|
||||
break;
|
||||
case ExifInterface.ORIENTATION_TRANSPOSE:
|
||||
matrix.setRotate(90);
|
||||
matrix.postScale(-1, 1);
|
||||
break;
|
||||
case ExifInterface.ORIENTATION_ROTATE_90:
|
||||
matrix.setRotate(90);
|
||||
break;
|
||||
case ExifInterface.ORIENTATION_TRANSVERSE:
|
||||
matrix.setRotate(-90);
|
||||
matrix.postScale(-1, 1);
|
||||
break;
|
||||
case ExifInterface.ORIENTATION_ROTATE_270:
|
||||
matrix.setRotate(-90);
|
||||
break;
|
||||
default:
|
||||
return bitmap;
|
||||
}
|
||||
try {
|
||||
Bitmap bmRotated = Bitmap.createBitmap(bitmap, 0, 0, bitmap.getWidth(), bitmap.getHeight(), matrix, true);
|
||||
bitmap.recycle();
|
||||
return bmRotated;
|
||||
}
|
||||
catch (OutOfMemoryError e) {
|
||||
e.printStackTrace();
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
<vector xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
xmlns:aapt="http://schemas.android.com/aapt"
|
||||
android:width="108dp"
|
||||
android:height="108dp"
|
||||
android:viewportWidth="108"
|
||||
android:viewportHeight="108">
|
||||
<path
|
||||
android:fillType="evenOdd"
|
||||
android:pathData="M32,64C32,64 38.39,52.99 44.13,50.95C51.37,48.37 70.14,49.57 70.14,49.57L108.26,87.69L108,109.01L75.97,107.97L32,64Z"
|
||||
android:strokeWidth="1"
|
||||
android:strokeColor="#00000000">
|
||||
<aapt:attr name="android:fillColor">
|
||||
<gradient
|
||||
android:endX="78.5885"
|
||||
android:endY="90.9159"
|
||||
android:startX="48.7653"
|
||||
android:startY="61.0927"
|
||||
android:type="linear">
|
||||
<item
|
||||
android:color="#44000000"
|
||||
android:offset="0.0" />
|
||||
<item
|
||||
android:color="#00000000"
|
||||
android:offset="1.0" />
|
||||
</gradient>
|
||||
</aapt:attr>
|
||||
</path>
|
||||
<path
|
||||
android:fillColor="#FFFFFF"
|
||||
android:fillType="nonZero"
|
||||
android:pathData="M66.94,46.02L66.94,46.02C72.44,50.07 76,56.61 76,64L32,64C32,56.61 35.56,50.11 40.98,46.06L36.18,41.19C35.45,40.45 35.45,39.3 36.18,38.56C36.91,37.81 38.05,37.81 38.78,38.56L44.25,44.05C47.18,42.57 50.48,41.71 54,41.71C57.48,41.71 60.78,42.57 63.68,44.05L69.11,38.56C69.84,37.81 70.98,37.81 71.71,38.56C72.44,39.3 72.44,40.45 71.71,41.19L66.94,46.02ZM62.94,56.92C64.08,56.92 65,56.01 65,54.88C65,53.76 64.08,52.85 62.94,52.85C61.8,52.85 60.88,53.76 60.88,54.88C60.88,56.01 61.8,56.92 62.94,56.92ZM45.06,56.92C46.2,56.92 47.13,56.01 47.13,54.88C47.13,53.76 46.2,52.85 45.06,52.85C43.92,52.85 43,53.76 43,54.88C43,56.01 43.92,56.92 45.06,56.92Z"
|
||||
android:strokeWidth="1"
|
||||
android:strokeColor="#00000000" />
|
||||
</vector>
|
|
@ -0,0 +1,170 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<vector xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
android:width="108dp"
|
||||
android:height="108dp"
|
||||
android:viewportWidth="108"
|
||||
android:viewportHeight="108">
|
||||
<path
|
||||
android:fillColor="#008577"
|
||||
android:pathData="M0,0h108v108h-108z" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M9,0L9,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,0L19,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M29,0L29,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M39,0L39,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M49,0L49,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M59,0L59,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M69,0L69,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M79,0L79,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M89,0L89,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M99,0L99,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,9L108,9"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,19L108,19"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,29L108,29"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,39L108,39"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,49L108,49"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,59L108,59"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,69L108,69"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,79L108,79"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,89L108,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,99L108,99"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,29L89,29"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,39L89,39"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,49L89,49"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,59L89,59"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,69L89,69"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,79L89,79"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M29,19L29,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M39,19L39,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M49,19L49,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M59,19L59,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M69,19L69,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M79,19L79,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
</vector>
|
|
@ -0,0 +1,148 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
xmlns:app="http://schemas.android.com/apk/res-auto"
|
||||
xmlns:tools="http://schemas.android.com/tools"
|
||||
android:layout_width="match_parent"
|
||||
android:layout_height="match_parent"
|
||||
tools:context=".MainActivity">
|
||||
|
||||
<RelativeLayout
|
||||
android:layout_width="match_parent"
|
||||
android:layout_height="match_parent">
|
||||
|
||||
<LinearLayout
|
||||
android:id="@+id/v_input_info"
|
||||
android:layout_width="fill_parent"
|
||||
android:layout_height="wrap_content"
|
||||
android:layout_alignParentTop="true"
|
||||
android:orientation="vertical">
|
||||
|
||||
<LinearLayout
|
||||
android:id="@+id/btn_layout"
|
||||
android:layout_width="fill_parent"
|
||||
android:layout_height="wrap_content"
|
||||
android:orientation="horizontal">
|
||||
|
||||
<Button
|
||||
android:id="@+id/btn_load_model"
|
||||
android:layout_width="0dp"
|
||||
android:layout_height="wrap_content"
|
||||
android:layout_weight="1"
|
||||
android:onClick="btn_load_model_click"
|
||||
android:text="加载模型" />
|
||||
<Button
|
||||
android:id="@+id/btn_run_model"
|
||||
android:layout_width="0dp"
|
||||
android:layout_height="wrap_content"
|
||||
android:layout_weight="1"
|
||||
android:onClick="btn_run_model_click"
|
||||
android:text="运行模型" />
|
||||
<Button
|
||||
android:id="@+id/btn_take_photo"
|
||||
android:layout_width="0dp"
|
||||
android:layout_height="wrap_content"
|
||||
android:layout_weight="1"
|
||||
android:onClick="btn_take_photo_click"
|
||||
android:text="拍照识别" />
|
||||
<Button
|
||||
android:id="@+id/btn_choice_img"
|
||||
android:layout_width="0dp"
|
||||
android:layout_height="wrap_content"
|
||||
android:layout_weight="1"
|
||||
android:onClick="btn_choice_img_click"
|
||||
android:text="选取图片" />
|
||||
|
||||
</LinearLayout>
|
||||
<TextView
|
||||
android:id="@+id/tv_input_setting"
|
||||
android:layout_width="wrap_content"
|
||||
android:layout_height="wrap_content"
|
||||
android:scrollbars="vertical"
|
||||
android:layout_marginLeft="12dp"
|
||||
android:layout_marginRight="12dp"
|
||||
android:layout_marginTop="10dp"
|
||||
android:layout_marginBottom="5dp"
|
||||
android:lineSpacingExtra="4dp"
|
||||
android:singleLine="false"
|
||||
android:maxLines="6"
|
||||
android:text=""/>
|
||||
<TextView
|
||||
android:id="@+id/tv_model_img_status"
|
||||
android:layout_width="wrap_content"
|
||||
android:layout_height="wrap_content"
|
||||
android:scrollbars="vertical"
|
||||
android:layout_marginLeft="12dp"
|
||||
android:layout_marginRight="12dp"
|
||||
android:layout_marginTop="-5dp"
|
||||
android:layout_marginBottom="5dp"
|
||||
android:lineSpacingExtra="4dp"
|
||||
android:singleLine="false"
|
||||
android:maxLines="6"
|
||||
android:text="STATUS: ok"/>
|
||||
|
||||
</LinearLayout>
|
||||
|
||||
<RelativeLayout
|
||||
android:layout_width="match_parent"
|
||||
android:layout_height="match_parent"
|
||||
android:layout_above="@+id/v_output_info"
|
||||
android:layout_below="@+id/v_input_info">
|
||||
|
||||
<ImageView
|
||||
android:id="@+id/iv_input_image"
|
||||
android:layout_width="400dp"
|
||||
android:layout_height="400dp"
|
||||
android:layout_centerHorizontal="true"
|
||||
android:layout_centerVertical="true"
|
||||
android:layout_marginLeft="12dp"
|
||||
android:layout_marginRight="12dp"
|
||||
android:layout_marginTop="5dp"
|
||||
android:layout_marginBottom="5dp"
|
||||
android:adjustViewBounds="true"
|
||||
android:scaleType="fitCenter"/>
|
||||
</RelativeLayout>
|
||||
|
||||
|
||||
<RelativeLayout
|
||||
android:id="@+id/v_output_info"
|
||||
android:layout_width="wrap_content"
|
||||
android:layout_height="wrap_content"
|
||||
android:layout_alignParentBottom="true"
|
||||
android:layout_centerHorizontal="true">
|
||||
|
||||
<TextView
|
||||
android:id="@+id/tv_output_result"
|
||||
android:layout_width="wrap_content"
|
||||
android:layout_height="wrap_content"
|
||||
android:layout_alignParentTop="true"
|
||||
android:layout_centerHorizontal="true"
|
||||
android:layout_centerVertical="true"
|
||||
android:scrollbars="vertical"
|
||||
android:layout_marginLeft="12dp"
|
||||
android:layout_marginRight="12dp"
|
||||
android:layout_marginTop="5dp"
|
||||
android:layout_marginBottom="5dp"
|
||||
android:textAlignment="center"
|
||||
android:lineSpacingExtra="5dp"
|
||||
android:singleLine="false"
|
||||
android:maxLines="5"
|
||||
android:text=""/>
|
||||
|
||||
<TextView
|
||||
android:id="@+id/tv_inference_time"
|
||||
android:layout_width="wrap_content"
|
||||
android:layout_height="wrap_content"
|
||||
android:layout_below="@+id/tv_output_result"
|
||||
android:layout_centerHorizontal="true"
|
||||
android:layout_centerVertical="true"
|
||||
android:textAlignment="center"
|
||||
android:layout_marginLeft="12dp"
|
||||
android:layout_marginRight="12dp"
|
||||
android:layout_marginTop="5dp"
|
||||
android:layout_marginBottom="10dp"
|
||||
android:text=""/>
|
||||
|
||||
</RelativeLayout>
|
||||
|
||||
</RelativeLayout>
|
||||
</androidx.constraintlayout.widget.ConstraintLayout>
|
|
@ -0,0 +1,46 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!-- for MiniActivity Use Only -->
|
||||
<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
xmlns:app="http://schemas.android.com/apk/res-auto"
|
||||
xmlns:tools="http://schemas.android.com/tools"
|
||||
android:layout_width="match_parent"
|
||||
android:layout_height="match_parent"
|
||||
app:layout_constraintLeft_toLeftOf="parent"
|
||||
app:layout_constraintLeft_toRightOf="parent"
|
||||
tools:context=".MainActivity">
|
||||
|
||||
<TextView
|
||||
android:id="@+id/sample_text"
|
||||
android:layout_width="0dp"
|
||||
android:layout_height="wrap_content"
|
||||
android:text="Hello World!"
|
||||
app:layout_constraintLeft_toLeftOf="parent"
|
||||
app:layout_constraintRight_toRightOf="parent"
|
||||
app:layout_constraintTop_toBottomOf="@id/imageView"
|
||||
android:scrollbars="vertical"
|
||||
/>
|
||||
|
||||
<ImageView
|
||||
android:id="@+id/imageView"
|
||||
android:layout_width="wrap_content"
|
||||
android:layout_height="wrap_content"
|
||||
android:paddingTop="20dp"
|
||||
android:paddingBottom="20dp"
|
||||
app:layout_constraintBottom_toTopOf="@id/imageView"
|
||||
app:layout_constraintLeft_toLeftOf="parent"
|
||||
app:layout_constraintRight_toRightOf="parent"
|
||||
app:layout_constraintTop_toTopOf="parent"
|
||||
tools:srcCompat="@tools:sample/avatars" />
|
||||
|
||||
<Button
|
||||
android:id="@+id/button"
|
||||
android:layout_width="wrap_content"
|
||||
android:layout_height="wrap_content"
|
||||
android:layout_marginBottom="4dp"
|
||||
android:text="Button"
|
||||
app:layout_constraintBottom_toBottomOf="parent"
|
||||
app:layout_constraintLeft_toLeftOf="parent"
|
||||
app:layout_constraintRight_toRightOf="parent"
|
||||
tools:layout_editor_absoluteX="161dp" />
|
||||
|
||||
</androidx.constraintlayout.widget.ConstraintLayout>
|
|
@ -0,0 +1,10 @@
|
|||
<menu xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
xmlns:app="http://schemas.android.com/apk/res-auto">
|
||||
|
||||
<group>
|
||||
<item
|
||||
android:id="@+id/settings"
|
||||
android:title="Settings..."
|
||||
app:showAsAction="withText"/>
|
||||
</group>
|
||||
</menu>
|
|
@ -0,0 +1,5 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
|
||||
<background android:drawable="@drawable/ic_launcher_background" />
|
||||
<foreground android:drawable="@drawable/ic_launcher_foreground" />
|
||||
</adaptive-icon>
|
|
@ -0,0 +1,5 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
|
||||
<background android:drawable="@drawable/ic_launcher_background" />
|
||||
<foreground android:drawable="@drawable/ic_launcher_foreground" />
|
||||
</adaptive-icon>
|
After Width: | Height: | Size: 2.9 KiB |
After Width: | Height: | Size: 4.8 KiB |