Merge remote-tracking branch 'origin/lite' into lite
This commit is contained in:
commit
f7081e3883
|
@ -206,7 +206,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
self.labelList = EditInList()
|
||||
labelListContainer = QWidget()
|
||||
labelListContainer.setLayout(listLayout)
|
||||
self.labelList.itemActivated.connect(self.labelSelectionChanged)
|
||||
#self.labelList.itemActivated.connect(self.labelSelectionChanged)
|
||||
self.labelList.itemSelectionChanged.connect(self.labelSelectionChanged)
|
||||
self.labelList.clicked.connect(self.labelList.item_clicked)
|
||||
# Connect to itemChanged to detect checkbox changes.
|
||||
|
@ -219,7 +219,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
################## detection box ####################
|
||||
self.BoxList = QListWidget()
|
||||
|
||||
self.BoxList.itemActivated.connect(self.boxSelectionChanged)
|
||||
#self.BoxList.itemActivated.connect(self.boxSelectionChanged)
|
||||
self.BoxList.itemSelectionChanged.connect(self.boxSelectionChanged)
|
||||
self.BoxList.itemDoubleClicked.connect(self.editBox)
|
||||
# Connect to itemChanged to detect checkbox changes.
|
||||
|
@ -435,7 +435,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
|
||||
######## New actions #######
|
||||
AutoRec = action(getStr('autoRecognition'), self.autoRecognition,
|
||||
'Ctrl+Shift+A', 'Auto', getStr('autoRecognition'), enabled=False)
|
||||
'', 'Auto', getStr('autoRecognition'), enabled=False)
|
||||
|
||||
reRec = action(getStr('reRecognition'), self.reRecognition,
|
||||
'Ctrl+Shift+R', 'reRec', getStr('reRecognition'), enabled=False)
|
||||
|
@ -444,7 +444,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
'Ctrl+R', 'reRec', getStr('singleRe'), enabled=False)
|
||||
|
||||
createpoly = action(getStr('creatPolygon'), self.createPolygon,
|
||||
'q', 'new', 'Creat Polygon', enabled=True)
|
||||
'q', 'new', getStr('creatPolygon'), enabled=True)
|
||||
|
||||
saveRec = action(getStr('saveRec'), self.saveRecResult,
|
||||
'', 'save', getStr('saveRec'), enabled=False)
|
||||
|
@ -452,6 +452,12 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
saveLabel = action(getStr('saveLabel'), self.saveLabelFile, #
|
||||
'Ctrl+S', 'save', getStr('saveLabel'), enabled=False)
|
||||
|
||||
undoLastPoint = action(getStr("undoLastPoint"), self.canvas.undoLastPoint,
|
||||
'Ctrl+Z', "undo", getStr("undoLastPoint"), enabled=False)
|
||||
|
||||
undo = action(getStr("undo"), self.undoShapeEdit,
|
||||
'Ctrl+Z', "undo", getStr("undo"), enabled=False)
|
||||
|
||||
self.editButton.setDefaultAction(edit)
|
||||
self.newButton.setDefaultAction(create)
|
||||
self.DelButton.setDefaultAction(deleteImg)
|
||||
|
@ -512,10 +518,11 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
zoom=zoom, zoomIn=zoomIn, zoomOut=zoomOut, zoomOrg=zoomOrg,
|
||||
fitWindow=fitWindow, fitWidth=fitWidth,
|
||||
zoomActions=zoomActions, saveLabel=saveLabel,
|
||||
undo=undo, undoLastPoint=undoLastPoint,
|
||||
fileMenuActions=(
|
||||
opendir, saveLabel, resetAll, quit),
|
||||
beginner=(), advanced=(),
|
||||
editMenu=(createpoly, edit, copy, delete,singleRere,
|
||||
editMenu=(createpoly, edit, copy, delete,singleRere,None, undo, undoLastPoint,
|
||||
None, color1, self.drawSquaresOption),
|
||||
beginnerContext=(create, edit, copy, delete, singleRere),
|
||||
advancedContext=(createMode, editMode, edit, copy,
|
||||
|
@ -549,8 +556,13 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
self.labelDialogOption.setChecked(settings.get(SETTING_PAINT_LABEL, False))
|
||||
self.labelDialogOption.triggered.connect(self.speedChoose)
|
||||
|
||||
self.autoSaveOption = QAction(getStr('autoSaveMode'), self)
|
||||
self.autoSaveOption.setCheckable(True)
|
||||
self.autoSaveOption.setChecked(settings.get(SETTING_PAINT_LABEL, False))
|
||||
self.autoSaveOption.triggered.connect(self.autoSaveFunc)
|
||||
|
||||
addActions(self.menus.file,
|
||||
(opendir, None, saveLabel, saveRec, None, resetAll, deleteImg, quit))
|
||||
(opendir, None, saveLabel, saveRec, self.autoSaveOption, None, resetAll, deleteImg, quit))
|
||||
|
||||
addActions(self.menus.help, (showSteps, showInfo))
|
||||
addActions(self.menus.view, (
|
||||
|
@ -566,9 +578,9 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
|
||||
# Custom context menu for the canvas widget:
|
||||
addActions(self.canvas.menus[0], self.actions.beginnerContext)
|
||||
addActions(self.canvas.menus[1], (
|
||||
action('&Copy here', self.copyShape),
|
||||
action('&Move here', self.moveShape)))
|
||||
#addActions(self.canvas.menus[1], (
|
||||
# action('&Copy here', self.copyShape),
|
||||
# action('&Move here', self.moveShape)))
|
||||
|
||||
|
||||
self.statusBar().showMessage('%s started.' % __appname__)
|
||||
|
@ -758,6 +770,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
self.canvas.setEditing(False)
|
||||
self.canvas.fourpoint = True
|
||||
self.actions.create.setEnabled(False)
|
||||
self.actions.undoLastPoint.setEnabled(True)
|
||||
|
||||
def toggleDrawingSensitive(self, drawing=True):
|
||||
"""In the middle of drawing, toggling between modes should be disabled."""
|
||||
|
@ -866,10 +879,11 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
self.updateComboBox()
|
||||
|
||||
def updateBoxlist(self):
|
||||
shape = self.canvas.selectedShape
|
||||
item = self.shapesToItemsbox[shape] # listitem
|
||||
text = [(int(p.x()), int(p.y())) for p in shape.points]
|
||||
item.setText(str(text))
|
||||
for shape in self.canvas.selectedShapes+[self.canvas.hShape]:
|
||||
item = self.shapesToItemsbox[shape] # listitem
|
||||
text = [(int(p.x()), int(p.y())) for p in shape.points]
|
||||
item.setText(str(text))
|
||||
self.actions.undo.setEnabled(True)
|
||||
self.setDirty()
|
||||
|
||||
def indexTo5Files(self, currIndex):
|
||||
|
@ -902,23 +916,27 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
if len(self.mImgList) > 0:
|
||||
self.zoomWidget.setValue(self.zoomWidgetValue + self.imgsplider.value())
|
||||
|
||||
# React to canvas signals.
|
||||
def shapeSelectionChanged(self, selected=False):
|
||||
if self._noSelectionSlot:
|
||||
self._noSelectionSlot = False
|
||||
else:
|
||||
shape = self.canvas.selectedShape
|
||||
if shape:
|
||||
self.shapesToItems[shape].setSelected(True)
|
||||
self.shapesToItemsbox[shape].setSelected(True) # ADD
|
||||
else:
|
||||
self.labelList.clearSelection()
|
||||
self.actions.delete.setEnabled(selected)
|
||||
self.actions.copy.setEnabled(selected)
|
||||
self.actions.edit.setEnabled(selected)
|
||||
self.actions.shapeLineColor.setEnabled(selected)
|
||||
self.actions.shapeFillColor.setEnabled(selected)
|
||||
self.actions.singleRere.setEnabled(selected)
|
||||
|
||||
def shapeSelectionChanged(self, selected_shapes):
|
||||
self._noSelectionSlot = True
|
||||
for shape in self.canvas.selectedShapes:
|
||||
shape.selected = False
|
||||
self.labelList.clearSelection()
|
||||
self.canvas.selectedShapes = selected_shapes
|
||||
for shape in self.canvas.selectedShapes:
|
||||
shape.selected = True
|
||||
self.shapesToItems[shape].setSelected(True)
|
||||
self.shapesToItemsbox[shape].setSelected(True)
|
||||
|
||||
self.labelList.scrollToItem(self.currentItem()) # QAbstractItemView.EnsureVisible
|
||||
self.BoxList.scrollToItem(self.currentBox())
|
||||
|
||||
self._noSelectionSlot = False
|
||||
n_selected = len(selected_shapes)
|
||||
self.actions.singleRere.setEnabled(n_selected)
|
||||
self.actions.delete.setEnabled(n_selected)
|
||||
self.actions.copy.setEnabled(n_selected)
|
||||
self.actions.edit.setEnabled(n_selected == 1)
|
||||
|
||||
def addLabel(self, shape):
|
||||
shape.paintLabel = self.displayLabelOption.isChecked()
|
||||
|
@ -941,22 +959,23 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
action.setEnabled(True)
|
||||
self.updateComboBox()
|
||||
|
||||
def remLabel(self, shape):
|
||||
if shape is None:
|
||||
def remLabels(self, shapes):
|
||||
if shapes is None:
|
||||
# print('rm empty label')
|
||||
return
|
||||
item = self.shapesToItems[shape]
|
||||
self.labelList.takeItem(self.labelList.row(item))
|
||||
del self.shapesToItems[shape]
|
||||
del self.itemsToShapes[item]
|
||||
self.updateComboBox()
|
||||
for shape in shapes:
|
||||
item = self.shapesToItems[shape]
|
||||
self.labelList.takeItem(self.labelList.row(item))
|
||||
del self.shapesToItems[shape]
|
||||
del self.itemsToShapes[item]
|
||||
self.updateComboBox()
|
||||
|
||||
# ADD:
|
||||
item = self.shapesToItemsbox[shape]
|
||||
self.BoxList.takeItem(self.BoxList.row(item))
|
||||
del self.shapesToItemsbox[shape]
|
||||
del self.itemsToShapesbox[item]
|
||||
self.updateComboBox()
|
||||
# ADD:
|
||||
item = self.shapesToItemsbox[shape]
|
||||
self.BoxList.takeItem(self.BoxList.row(item))
|
||||
del self.shapesToItemsbox[shape]
|
||||
del self.itemsToShapesbox[item]
|
||||
self.updateComboBox()
|
||||
|
||||
def loadLabels(self, shapes):
|
||||
s = []
|
||||
|
@ -1001,7 +1020,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
item.setText(str([(int(p.x()), int(p.y())) for p in shape.points]))
|
||||
self.updateComboBox()
|
||||
|
||||
def updateComboBox(self):
|
||||
def updateComboBox(self): # TODO:貌似没用
|
||||
# Get the unique labels and add them to the Combobox.
|
||||
itemsTextList = [str(self.labelList.item(i).text()) for i in range(self.labelList.count())]
|
||||
|
||||
|
@ -1031,7 +1050,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
|
||||
for box in self.result_dic:
|
||||
trans_dic = {"label": box[1][0], "points": box[0], 'difficult': False}
|
||||
if trans_dic["label"] is "" and mode == 'Auto':
|
||||
if trans_dic["label"] == "" and mode == 'Auto':
|
||||
continue
|
||||
shapes.append(trans_dic)
|
||||
|
||||
|
@ -1054,26 +1073,38 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
return False
|
||||
|
||||
def copySelectedShape(self):
|
||||
self.addLabel(self.canvas.copySelectedShape())
|
||||
for shape in self.canvas.copySelectedShape():
|
||||
self.addLabel(shape)
|
||||
# fix copy and delete
|
||||
self.shapeSelectionChanged(True)
|
||||
#self.shapeSelectionChanged(True)
|
||||
|
||||
|
||||
def labelSelectionChanged(self):
|
||||
item = self.currentItem()
|
||||
self.labelList.scrollToItem(item, QAbstractItemView.EnsureVisible)
|
||||
if item and self.canvas.editing():
|
||||
self._noSelectionSlot = True
|
||||
self.canvas.selectShape(self.itemsToShapes[item])
|
||||
shape = self.itemsToShapes[item]
|
||||
if self._noSelectionSlot:
|
||||
return
|
||||
if self.canvas.editing():
|
||||
selected_shapes = []
|
||||
for item in self.labelList.selectedItems():
|
||||
selected_shapes.append(self.itemsToShapes[item])
|
||||
if selected_shapes:
|
||||
self.canvas.selectShapes(selected_shapes)
|
||||
else:
|
||||
self.canvas.deSelectShape()
|
||||
|
||||
|
||||
def boxSelectionChanged(self):
|
||||
item = self.currentBox()
|
||||
self.BoxList.scrollToItem(item, QAbstractItemView.EnsureVisible)
|
||||
if item and self.canvas.editing():
|
||||
self._noSelectionSlot = True
|
||||
self.canvas.selectShape(self.itemsToShapesbox[item])
|
||||
shape = self.itemsToShapesbox[item]
|
||||
if self._noSelectionSlot:
|
||||
#self.BoxList.scrollToItem(self.currentBox(), QAbstractItemView.PositionAtCenter)
|
||||
return
|
||||
if self.canvas.editing():
|
||||
selected_shapes = []
|
||||
for item in self.BoxList.selectedItems():
|
||||
selected_shapes.append(self.itemsToShapesbox[item])
|
||||
if selected_shapes:
|
||||
self.canvas.selectShapes(selected_shapes)
|
||||
else:
|
||||
self.canvas.deSelectShape()
|
||||
|
||||
|
||||
def labelItemChanged(self, item):
|
||||
shape = self.itemsToShapes[item]
|
||||
|
@ -1113,6 +1144,8 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
if self.beginner(): # Switch to edit mode.
|
||||
self.canvas.setEditing(True)
|
||||
self.actions.create.setEnabled(True)
|
||||
self.actions.undoLastPoint.setEnabled(False)
|
||||
self.actions.undo.setEnabled(True)
|
||||
else:
|
||||
self.actions.editMode.setEnabled(True)
|
||||
self.setDirty()
|
||||
|
@ -1450,7 +1483,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
item = QListWidgetItem(closeicon, filename)
|
||||
self.fileListWidget.addItem(item)
|
||||
|
||||
print('dirPath in importDirImages is', dirpath)
|
||||
print('DirPath in importDirImages is', dirpath)
|
||||
self.iconlist.clear()
|
||||
self.additems5(dirpath)
|
||||
self.changeFileFolder = True
|
||||
|
@ -1459,7 +1492,6 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
self.reRecogButton.setEnabled(True)
|
||||
self.actions.AutoRec.setEnabled(True)
|
||||
self.actions.reRec.setEnabled(True)
|
||||
self.actions.saveLabel.setEnabled(True)
|
||||
|
||||
|
||||
def openPrevImg(self, _value=False):
|
||||
|
@ -1549,6 +1581,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
self.fileListWidget.insertItem(int(currIndex), item)
|
||||
self.openNextImg()
|
||||
self.actions.saveRec.setEnabled(True)
|
||||
self.actions.saveLabel.setEnabled(True)
|
||||
|
||||
elif mode == 'Auto':
|
||||
if annotationFilePath and self.saveLabels(annotationFilePath, mode=mode):
|
||||
|
@ -1644,7 +1677,8 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
self.setDirty()
|
||||
|
||||
def deleteSelectedShape(self):
|
||||
self.remLabel(self.canvas.deleteSelected())
|
||||
self.remLabels(self.canvas.deleteSelected())
|
||||
self.actions.undo.setEnabled(True)
|
||||
self.setDirty()
|
||||
if self.noShapes():
|
||||
for action in self.actions.onShapesPresent:
|
||||
|
@ -1654,7 +1688,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
color = self.colorDialog.getColor(self.lineColor, u'Choose line color',
|
||||
default=DEFAULT_LINE_COLOR)
|
||||
if color:
|
||||
self.canvas.selectedShape.line_color = color
|
||||
for shape in self.canvas.selectedShapes: shape.line_color = color
|
||||
self.canvas.update()
|
||||
self.setDirty()
|
||||
|
||||
|
@ -1662,7 +1696,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
color = self.colorDialog.getColor(self.fillColor, u'Choose fill color',
|
||||
default=DEFAULT_FILL_COLOR)
|
||||
if color:
|
||||
self.canvas.selectedShape.fill_color = color
|
||||
for shape in self.canvas.selectedShapes: shape.fill_color = color
|
||||
self.canvas.update()
|
||||
self.setDirty()
|
||||
|
||||
|
@ -1764,7 +1798,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
QMessageBox.information(self, "Information", msg)
|
||||
return
|
||||
result = self.ocr.ocr(img_crop, cls=True, det=False)
|
||||
if result[0][0] is not '':
|
||||
if result[0][0] != '':
|
||||
result.insert(0, box)
|
||||
print('result in reRec is ', result)
|
||||
self.result_dic.append(result)
|
||||
|
@ -1786,25 +1820,25 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
|
||||
def singleRerecognition(self):
|
||||
img = cv2.imread(self.filePath)
|
||||
shape = self.canvas.selectedShape
|
||||
box = [[int(p.x()), int(p.y())] for p in shape.points]
|
||||
assert len(box) == 4
|
||||
img_crop = get_rotate_crop_image(img, np.array(box, np.float32))
|
||||
if img_crop is None:
|
||||
msg = 'Can not recognise the detection box in ' + self.filePath + '. Please change manually'
|
||||
QMessageBox.information(self, "Information", msg)
|
||||
return
|
||||
result = self.ocr.ocr(img_crop, cls=True, det=False)
|
||||
if result[0][0] is not '':
|
||||
result.insert(0, box)
|
||||
print('result in reRec is ', result)
|
||||
if result[1][0] == shape.label:
|
||||
print('label no change')
|
||||
else:
|
||||
shape.label = result[1][0]
|
||||
self.singleLabel(shape)
|
||||
self.setDirty()
|
||||
print(box)
|
||||
for shape in self.canvas.selectedShapes:
|
||||
box = [[int(p.x()), int(p.y())] for p in shape.points]
|
||||
assert len(box) == 4
|
||||
img_crop = get_rotate_crop_image(img, np.array(box, np.float32))
|
||||
if img_crop is None:
|
||||
msg = 'Can not recognise the detection box in ' + self.filePath + '. Please change manually'
|
||||
QMessageBox.information(self, "Information", msg)
|
||||
return
|
||||
result = self.ocr.ocr(img_crop, cls=True, det=False)
|
||||
if result[0][0] != '':
|
||||
result.insert(0, box)
|
||||
print('result in reRec is ', result)
|
||||
if result[1][0] == shape.label:
|
||||
print('label no change')
|
||||
else:
|
||||
shape.label = result[1][0]
|
||||
self.singleLabel(shape)
|
||||
self.setDirty()
|
||||
print(box)
|
||||
|
||||
def autolcm(self):
|
||||
vbox = QVBoxLayout()
|
||||
|
@ -1862,6 +1896,8 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
for each in states:
|
||||
file, state = each.split('\t')
|
||||
self.fileStatedict[file] = 1
|
||||
self.actions.saveLabel.setEnabled(True)
|
||||
self.actions.saveRec.setEnabled(True)
|
||||
|
||||
|
||||
def saveFilestate(self):
|
||||
|
@ -1913,28 +1949,35 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
self.savePPlabel()
|
||||
|
||||
def saveRecResult(self):
|
||||
if None in [self.PPlabelpath, self.PPlabel, self.fileStatedict]:
|
||||
QMessageBox.information(self, "Information", "Save file first")
|
||||
if {} in [self.PPlabelpath, self.PPlabel, self.fileStatedict]:
|
||||
QMessageBox.information(self, "Information", "Check the image first")
|
||||
return
|
||||
|
||||
rec_gt_dir = os.path.dirname(self.PPlabelpath) + '/rec_gt.txt'
|
||||
crop_img_dir = os.path.dirname(self.PPlabelpath) + '/crop_img/'
|
||||
ques_img = []
|
||||
if not os.path.exists(crop_img_dir):
|
||||
os.mkdir(crop_img_dir)
|
||||
|
||||
with open(rec_gt_dir, 'w', encoding='utf-8') as f:
|
||||
for key in self.fileStatedict:
|
||||
idx = self.getImglabelidx(key)
|
||||
for i, label in enumerate(self.PPlabel[idx]):
|
||||
if label['difficult']: continue
|
||||
try:
|
||||
img = cv2.imread(key)
|
||||
img_crop = get_rotate_crop_image(img, np.array(label['points'], np.float32))
|
||||
img_name = os.path.splitext(os.path.basename(idx))[0] + '_crop_'+str(i)+'.jpg'
|
||||
cv2.imwrite(crop_img_dir+img_name, img_crop)
|
||||
f.write('crop_img/'+ img_name + '\t')
|
||||
f.write(label['transcription'] + '\n')
|
||||
|
||||
QMessageBox.information(self, "Information", "Cropped images has been saved in "+str(crop_img_dir))
|
||||
for i, label in enumerate(self.PPlabel[idx]):
|
||||
if label['difficult']: continue
|
||||
img_crop = get_rotate_crop_image(img, np.array(label['points'], np.float32))
|
||||
img_name = os.path.splitext(os.path.basename(idx))[0] + '_crop_'+str(i)+'.jpg'
|
||||
cv2.imwrite(crop_img_dir+img_name, img_crop)
|
||||
f.write('crop_img/'+ img_name + '\t')
|
||||
f.write(label['transcription'] + '\n')
|
||||
except Exception as e:
|
||||
ques_img.append(key)
|
||||
print("Can not read image ",e)
|
||||
if ques_img:
|
||||
QMessageBox.information(self, "Information", "The following images can not be saved, "
|
||||
"please check the image path and labels.\n" + "".join(str(i)+'\n' for i in ques_img))
|
||||
QMessageBox.information(self, "Information", "Cropped images have been saved in "+str(crop_img_dir))
|
||||
|
||||
def speedChoose(self):
|
||||
if self.labelDialogOption.isChecked():
|
||||
|
@ -1945,6 +1988,33 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
self.canvas.newShape.disconnect()
|
||||
self.canvas.newShape.connect(partial(self.newShape, False))
|
||||
|
||||
def autoSaveFunc(self):
|
||||
if self.autoSaveOption.isChecked():
|
||||
self.autoSaveNum = 1 # Real auto_Save
|
||||
try:
|
||||
self.saveLabelFile()
|
||||
except:
|
||||
pass
|
||||
print('The program will automatically save once after confirming an image')
|
||||
else:
|
||||
self.autoSaveNum = 5 # Used for backup
|
||||
print('The program will automatically save once after confirming 5 images (default)')
|
||||
|
||||
def undoShapeEdit(self):
|
||||
self.canvas.restoreShape()
|
||||
self.labelList.clear()
|
||||
self.BoxList.clear()
|
||||
self.loadShapes(self.canvas.shapes)
|
||||
self.actions.undo.setEnabled(self.canvas.isShapeRestorable)
|
||||
|
||||
def loadShapes(self, shapes, replace=True):
|
||||
self._noSelectionSlot = True
|
||||
for shape in shapes:
|
||||
self.addLabel(shape)
|
||||
self.labelList.clearSelection()
|
||||
self._noSelectionSlot = False
|
||||
self.canvas.loadShapes(shapes, replace=replace)
|
||||
|
||||
|
||||
def inverted(color):
|
||||
return QColor(*[255 - v for v in color.getRgb()])
|
||||
|
@ -1991,7 +2061,7 @@ if __name__ == '__main__':
|
|||
resource_file = './libs/resources.py'
|
||||
if not os.path.exists(resource_file):
|
||||
output = os.system('pyrcc5 -o libs/resources.py resources.qrc')
|
||||
assert output is 0, "operate the cmd have some problems ,please check whether there is a in the lib " \
|
||||
assert output == 0, "operate the cmd have some problems ,please check whether there is a in the lib " \
|
||||
"directory resources.py "
|
||||
import libs.resources
|
||||
sys.exit(main())
|
||||
|
|
|
@ -8,15 +8,18 @@ PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field, w
|
|||
|
||||
### Recent Update
|
||||
|
||||
- 2021.2.5: New batch processing and undo functions (by [Evezerest](https://github.com/Evezerest)):
|
||||
- Batch processing function: Press and hold the Ctrl key to select the box, you can move, copy, and delete in batches.
|
||||
- Undo function: In the process of drawing a four-point label box or after editing the box, press Ctrl+Z to undo the previous operation.
|
||||
- Fix image rotation and size problems, optimize the process of editing the mark frame (by [ninetailskim](https://github.com/ninetailskim)、 [edencfc](https://github.com/edencfc)).
|
||||
- 2021.1.11: Optimize the labeling experience (by [edencfc](https://github.com/edencfc)),
|
||||
- Users can choose whether to pop up the label input dialog after drawing the detection box in "View - Pop-up Label Input Dialog".
|
||||
- Users can choose whether to pop up the label input dialog after drawing the detection box in "View - Pop-up Label Input Dialog".
|
||||
- The recognition result scrolls synchronously when users click related detection box.
|
||||
- Click to modify the recognition result.(If you can't change the result, please switch to the system default input method, or switch back to the original input method again)
|
||||
- 2020.12.18: Support re-recognition of a single label box (by [ninetailskim](https://github.com/ninetailskim) ), perfect shortcut keys.
|
||||
|
||||
### TODO:
|
||||
- Lock box mode: For the same scene data, the size and position of the locked detection box can be transferred between different pictures.
|
||||
- Experience optimization: Add undo, batch operation include move, copy, delete and so on, optimize the annotation process.
|
||||
|
||||
## Installation
|
||||
|
||||
|
@ -49,7 +52,7 @@ python3 PPOCRLabel.py
|
|||
```
|
||||
pip3 install pyqt5
|
||||
pip3 uninstall opencv-python # Uninstall opencv manually as it conflicts with pyqt
|
||||
pip3 install opencv-contrib-python-headless # Install the headless version of opencv
|
||||
pip3 install opencv-contrib-python-headless==4.2.0.32 # Install the headless version of opencv
|
||||
cd ./PPOCRLabel # Change the directory to the PPOCRLabel folder
|
||||
python3 PPOCRLabel.py
|
||||
```
|
||||
|
@ -76,12 +79,11 @@ python3 PPOCRLabel.py
|
|||
|
||||
7. Double click the result in 'recognition result' list to manually change inaccurate recognition results.
|
||||
|
||||
8. Click "Check", the image status will switch to "√",then the program automatically jump to the next(The results will not be written directly to the file at this time).
|
||||
8. Click "Check", the image status will switch to "√",then the program automatically jump to the next.
|
||||
|
||||
9. Click "Delete Image" and the image will be deleted to the recycle bin.
|
||||
|
||||
10. Labeling result: the user can save manually through the menu "File - Save Label", while the program will also save automatically after every 5 images confirmed by the user.the manually checked label will be stored in *Label.txt* under the opened picture folder.
|
||||
Click "PaddleOCR"-"Save Recognition Results" in the menu bar, the recognition training data of such pictures will be saved in the *crop_img* folder, and the recognition label will be saved in *rec_gt.txt*<sup>[4]</sup>.
|
||||
10. Labeling result: the user can save manually through the menu "File - Save Label", while the program will also save automatically if "File - Auto Save Label Mode" is selected. The manually checked label will be stored in *Label.txt* under the opened picture folder. Click "PaddleOCR"-"Save Recognition Results" in the menu bar, the recognition training data of such pictures will be saved in the *crop_img* folder, and the recognition label will be saved in *rec_gt.txt*<sup>[4]</sup>.
|
||||
|
||||
### Note
|
||||
|
||||
|
@ -89,8 +91,7 @@ python3 PPOCRLabel.py
|
|||
|
||||
[2] The image status indicates whether the user has saved the image manually. If it has not been saved manually it is "X", otherwise it is "√", PPOCRLabel will not relabel pictures with a status of "√".
|
||||
|
||||
[3] After clicking "Re-recognize", the model will overwrite ALL recognition results in the picture.
|
||||
Therefore, if the recognition result has been manually changed before, it may change after re-recognition.
|
||||
[3] After clicking "Re-recognize", the model will overwrite ALL recognition results in the picture. Therefore, if the recognition result has been manually changed before, it may change after re-recognition.
|
||||
|
||||
[4] The files produced by PPOCRLabel can be found under the opened picture folder including the following, please do not manually change the contents, otherwise it will cause the program to be abnormal.
|
||||
|
||||
|
@ -106,28 +107,29 @@ Therefore, if the recognition result has been manually changed before, it may ch
|
|||
|
||||
### Shortcut keys
|
||||
|
||||
| Shortcut keys | Description |
|
||||
| ---------------- | ------------------------------------------------ |
|
||||
| Ctrl + shift + A | Automatically label all unchecked images |
|
||||
| Ctrl + shift + R | Re-recognize all the labels of the current image |
|
||||
| W | Create a rect box |
|
||||
| Q | Create a four-points box |
|
||||
| Ctrl + E | Edit label of the selected box |
|
||||
| Ctrl + R | Re-recognize the selected box |
|
||||
| Backspace | Delete the selected box |
|
||||
| Ctrl + V | Check image |
|
||||
| Ctrl + Shift + d | Delete image |
|
||||
| D | Next image |
|
||||
| A | Previous image |
|
||||
| Ctrl++ | Zoom in |
|
||||
| Ctrl-- | Zoom out |
|
||||
| ↑→↓← | Move selected box |
|
||||
| Shortcut keys | Description |
|
||||
| ------------------------ | ------------------------------------------------ |
|
||||
| Ctrl + Shift + R | Re-recognize all the labels of the current image |
|
||||
| W | Create a rect box |
|
||||
| Q | Create a four-points box |
|
||||
| Ctrl + E | Edit label of the selected box |
|
||||
| Ctrl + R | Re-recognize the selected box |
|
||||
| Ctrl + C | Copy and paste the selected box |
|
||||
| Ctrl + Left Mouse Button | Multi select the label box |
|
||||
| Backspace | Delete the selected box |
|
||||
| Ctrl + V | Check image |
|
||||
| Ctrl + Shift + d | Delete image |
|
||||
| D | Next image |
|
||||
| A | Previous image |
|
||||
| Ctrl++ | Zoom in |
|
||||
| Ctrl-- | Zoom out |
|
||||
| ↑→↓← | Move selected box |
|
||||
|
||||
### Built-in Model
|
||||
|
||||
- Default model: PPOCRLabel uses the Chinese and English ultra-lightweight OCR model in PaddleOCR by default, supports Chinese, English and number recognition, and multiple language detection.
|
||||
|
||||
- Model language switching: Changing the built-in model language is supportable by clicking "PaddleOCR"-"Choose OCR Model" in the menu bar. Currently supported languagesinclude French, German, Korean, and Japanese.
|
||||
- Model language switching: Changing the built-in model language is supportable by clicking "PaddleOCR"-"Choose OCR Model" in the menu bar. Currently supported languagesinclude French, German, Korean, and Japanese.
|
||||
For specific model download links, please refer to [PaddleOCR Model List](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/models_list_en.md#multilingual-recognition-modelupdating)
|
||||
|
||||
- Custom model: The model trained by users can be replaced by modifying PPOCRLabel.py in [PaddleOCR class instantiation](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/PPOCRLabel/PPOCRLabel.py#L110) referring [Custom Model Code](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/whl_en.md#use-custom-model)
|
||||
|
@ -136,7 +138,7 @@ Therefore, if the recognition result has been manually changed before, it may ch
|
|||
|
||||
PPOCRLabel supports three ways to save Label.txt
|
||||
|
||||
- Automatically save: When it detects that the user has manually checked 5 pictures, the program automatically writes the annotations into Label.txt. The user can change the value of ``self.autoSaveNum`` in ``PPOCRLabel.py`` to set the number of images to be automatically saved after confirmation.
|
||||
- Automatically save: After selecting "File - Auto Save Label Mode", the program will automatically write the annotations into Label.txt every time the user confirms an image. If this option is not turned on, it will be automatically saved after detecting that the user has manually checked 5 images.
|
||||
- Manual save: Click "File-Save Marking Results" to manually save the label.
|
||||
- Close application save
|
||||
|
||||
|
@ -160,11 +162,11 @@ For some data that are difficult to recognize, the recognition results will not
|
|||
```
|
||||
pyrcc5 -o libs/resources.py resources.qrc
|
||||
```
|
||||
- If you get an error ``` module 'cv2' has no attribute 'INTER_NEAREST'```, you need to delete all opencv related packages first, and then reinstall the headless version of opencv
|
||||
- If you get an error ``` module 'cv2' has no attribute 'INTER_NEAREST'```, you need to delete all opencv related packages first, and then reinstall the 4.2.0.32 version of headless opencv
|
||||
```
|
||||
pip install opencv-contrib-python-headless
|
||||
pip install opencv-contrib-python-headless==4.2.0.32
|
||||
```
|
||||
|
||||
|
||||
### Related
|
||||
|
||||
1.[Tzutalin. LabelImg. Git code (2015)](https://github.com/tzutalin/labelImg)
|
||||
1.[Tzutalin. LabelImg. Git code (2015)](https://github.com/tzutalin/labelImg)
|
|
@ -8,6 +8,10 @@ PPOCRLabel是一款适用于OCR领域的半自动化图形标注工具,内置P
|
|||
|
||||
#### 近期更新
|
||||
|
||||
- 2021.2.5:新增批处理与撤销功能(by [Evezerest](https://github.com/Evezerest))
|
||||
- 批处理功能:按住Ctrl键选择标记框后可批量移动、复制、删除。
|
||||
- 撤销功能:在绘制四点标注框过程中或对框进行编辑操作后,按下Ctrl+Z可撤销上一部操作。
|
||||
- 修复图像旋转和尺寸问题、优化编辑标记框过程(by [ninetailskim](https://github.com/ninetailskim)、 [edencfc](https://github.com/edencfc))
|
||||
- 2021.1.11:优化标注体验(by [edencfc](https://github.com/edencfc)):
|
||||
- 用户可在“视图 - 弹出标记输入框”选择在画完检测框后标记输入框是否弹出。
|
||||
- 识别结果与检测框同步滚动。
|
||||
|
@ -17,9 +21,8 @@ PPOCRLabel是一款适用于OCR领域的半自动化图形标注工具,内置P
|
|||
#### 尽请期待
|
||||
|
||||
- 锁定框模式:针对同一场景数据,被锁定的检测框的大小与位置能在不同图片之间传递。
|
||||
- 体验优化:增加撤销操作,批量移动、复制、删除等功能。优化标注流程。
|
||||
|
||||
如果您对以上内容感兴趣或对完善工具有不一样的想法,欢迎加入我们的队伍与我们共同开发
|
||||
如果您对以上内容感兴趣或对完善工具有不一样的想法,欢迎加入我们的SIG队伍与我们共同开发。可以在[此处](https://github.com/PaddlePaddle/PaddleOCR/issues/1728)完成问卷和前置任务,经过我们确认相关内容后即可正式加入,享受SIG福利,共同为OCR开源事业贡献(特别说明:针对PPOCRLabel的改进也属于PaddleOCR前置任务)
|
||||
|
||||
|
||||
## 安装
|
||||
|
@ -49,7 +52,7 @@ python3 PPOCRLabel.py --lang ch
|
|||
```
|
||||
pip3 install pyqt5
|
||||
pip3 uninstall opencv-python # 由于mac版本的opencv与pyqt有冲突,需先手动卸载opencv
|
||||
pip3 install opencv-contrib-python-headless # 安装headless版本的open-cv
|
||||
pip3 install opencv-contrib-python-headless==4.2.0.32 # 安装headless版本的open-cv
|
||||
cd ./PPOCRLabel # 将目录切换到PPOCRLabel文件夹下
|
||||
python3 PPOCRLabel.py --lang ch
|
||||
```
|
||||
|
@ -65,9 +68,9 @@ python3 PPOCRLabel.py --lang ch
|
|||
5. 标记框绘制完成后,用户点击 “确认”,检测框会先被预分配一个 “待识别” 标签。
|
||||
6. 重新识别:将图片中的所有检测画绘制/调整完成后,点击 “重新识别”,PPOCR模型会对当前图片中的**所有检测框**重新识别<sup>[3]</sup>。
|
||||
7. 内容更改:双击识别结果,对不准确的识别结果进行手动更改。
|
||||
8. 确认标记:点击 “确认”,图片状态切换为 “√”,跳转至下一张(此时不会直接将结果写入文件)。
|
||||
8. **确认标记**:点击 “确认”,图片状态切换为 “√”,跳转至下一张。
|
||||
9. 删除:点击 “删除图像”,图片将会被删除至回收站。
|
||||
10. 保存结果:用户可以通过菜单中“文件-保存标记结果”手动保存,同时程序也会在用户每确认5张图片后自动保存一次。手动确认过的标记将会被存放在所打开图片文件夹下的*Label.txt*中。在菜单栏点击 “文件” - "保存识别结果"后,会将此类图片的识别训练数据保存在*crop_img*文件夹下,识别标签保存在*rec_gt.txt*中<sup>[4]</sup>。
|
||||
10. 保存结果:用户可以通过菜单中“文件-保存标记结果”手动保存,同时也可以点击“文件 - 自动保存标记结果”开启自动保存。手动确认过的标记将会被存放在所打开图片文件夹下的*Label.txt*中。在菜单栏点击 “文件” - "保存识别结果"后,会将此类图片的识别训练数据保存在*crop_img*文件夹下,识别标签保存在*rec_gt.txt*中<sup>[4]</sup>。
|
||||
|
||||
### 注意
|
||||
|
||||
|
@ -93,12 +96,13 @@ python3 PPOCRLabel.py --lang ch
|
|||
|
||||
| 快捷键 | 说明 |
|
||||
| ---------------- | ---------------------------- |
|
||||
| Ctrl + shift + A | 自动标注所有未确认过的图片 |
|
||||
| Ctrl + shift + R | 对当前图片的所有标记重新识别 |
|
||||
| W | 新建矩形框 |
|
||||
| Q | 新建四点框 |
|
||||
| Ctrl + E | 编辑所选框标签 |
|
||||
| Ctrl + R | 重新识别所选标记 |
|
||||
| Ctrl + C | 复制并粘贴选中的标记框 |
|
||||
| Ctrl + 鼠标左键 | 多选标记框 |
|
||||
| Backspace | 删除所选框 |
|
||||
| Ctrl + V | 确认本张图片标记 |
|
||||
| Ctrl + Shift + d | 删除本张图片 |
|
||||
|
@ -120,7 +124,7 @@ python3 PPOCRLabel.py --lang ch
|
|||
|
||||
PPOCRLabel支持三种保存方式:
|
||||
|
||||
- 程序自动保存:当检测到用户手动确认过5张图片后,程序自动将标记结果写入Label.txt中。其中用户可通过更改```PPOCRLabel.py```中的```self.autoSaveNum```的数值设置确认几张图片后进行自动保存。
|
||||
- 自动保存:点击“文件 - 自动保存标记结果”后,用户每确认过一张图片,程序自动将标记结果写入Label.txt中。若未开启此选项,则检测到用户手动确认过5张图片后进行自动保存。
|
||||
- 手动保存:点击“文件 - 保存标记结果”手动保存标记。
|
||||
- 关闭应用程序保存
|
||||
|
||||
|
@ -132,22 +136,22 @@ PPOCRLabel支持三种保存方式:
|
|||
|
||||
### 错误提示
|
||||
- 如果同时使用whl包安装了paddleocr,其优先级大于通过paddleocr.py调用PaddleOCR类,whl包未更新时会导致程序异常。
|
||||
|
||||
|
||||
- PPOCRLabel**不支持对中文文件名**的图片进行自动标注。
|
||||
|
||||
- 针对Linux用户:如果您在打开软件过程中出现**objc[XXXXX]**开头的错误,证明您的opencv版本太高,建议安装4.2版本:
|
||||
```
|
||||
pip install opencv-python==4.2.0.32
|
||||
```
|
||||
|
||||
|
||||
- 如果出现 ```Missing string id``` 开头的错误,需要重新编译资源:
|
||||
```
|
||||
pyrcc5 -o libs/resources.py resources.qrc
|
||||
```
|
||||
|
||||
- 如果出现``` module 'cv2' has no attribute 'INTER_NEAREST'```错误,需要首先删除所有opencv相关包,然后重新安装headless版本的opencv
|
||||
|
||||
- 如果出现``` module 'cv2' has no attribute 'INTER_NEAREST'```错误,需要首先删除所有opencv相关包,然后重新安装4.2.0.32版本的headless opencv
|
||||
```
|
||||
pip install opencv-contrib-python-headless
|
||||
pip install opencv-contrib-python-headless==4.2.0.32
|
||||
```
|
||||
|
||||
### 参考资料
|
||||
|
|
|
@ -37,7 +37,8 @@ class Canvas(QWidget):
|
|||
zoomRequest = pyqtSignal(int)
|
||||
scrollRequest = pyqtSignal(int, int)
|
||||
newShape = pyqtSignal()
|
||||
selectionChanged = pyqtSignal(bool)
|
||||
# selectionChanged = pyqtSignal(bool)
|
||||
selectionChanged = pyqtSignal(list)
|
||||
shapeMoved = pyqtSignal()
|
||||
drawingPolygon = pyqtSignal(bool)
|
||||
|
||||
|
@ -51,9 +52,11 @@ class Canvas(QWidget):
|
|||
# Initialise local state.
|
||||
self.mode = self.EDIT
|
||||
self.shapes = []
|
||||
self.shapesBackups = []
|
||||
self.current = None
|
||||
self.selectedShapes = []
|
||||
self.selectedShape = None # save the selected shape here
|
||||
self.selectedShapeCopy = None
|
||||
self.selectedShapesCopy = []
|
||||
self.drawingLineColor = QColor(0, 0, 255)
|
||||
self.drawingRectColor = QColor(0, 0, 255)
|
||||
self.line = Shape(line_color=self.drawingLineColor)
|
||||
|
@ -77,6 +80,7 @@ class Canvas(QWidget):
|
|||
self.drawSquare = False
|
||||
self.fourpoint = True # ADD
|
||||
self.pointnum = 0
|
||||
self.movingShape = False
|
||||
|
||||
#initialisation for panning
|
||||
self.pan_initial_pos = QPoint()
|
||||
|
@ -149,37 +153,20 @@ class Canvas(QWidget):
|
|||
clipped_x = min(max(0, pos.x()), size.width())
|
||||
clipped_y = min(max(0, pos.y()), size.height())
|
||||
pos = QPointF(clipped_x, clipped_y)
|
||||
elif len(self.current) > 1 and self.closeEnough(pos, self.current[0]) and not self.fourpoint:
|
||||
|
||||
elif len(self.current) > 1 and self.closeEnough(pos, self.current[0]):
|
||||
# Attract line to starting point and colorise to alert the
|
||||
# user:
|
||||
pos = self.current[0]
|
||||
color = self.current.line_color
|
||||
self.overrideCursor(CURSOR_POINT)
|
||||
self.current.highlightVertex(0, Shape.NEAR_VERTEX)
|
||||
elif ( # ADD
|
||||
len(self.current) > 1
|
||||
and self.fourpoint
|
||||
and self.closeEnough(pos, self.current[0])
|
||||
):
|
||||
# Attract line to starting point and
|
||||
# colorise to alert the user.
|
||||
pos = self.current[0]
|
||||
self.overrideCursor(CURSOR_POINT)
|
||||
self.current.highlightVertex(0, Shape.NEAR_VERTEX)
|
||||
|
||||
|
||||
if self.drawSquare:
|
||||
initPos = self.current[0]
|
||||
minX = initPos.x()
|
||||
minY = initPos.y()
|
||||
min_size = min(abs(pos.x() - minX), abs(pos.y() - minY))
|
||||
directionX = -1 if pos.x() - minX < 0 else 1
|
||||
directionY = -1 if pos.y() - minY < 0 else 1
|
||||
self.line[1] = QPointF(minX + directionX * min_size, minY + directionY * min_size)
|
||||
self.line.points = [self.current[0], pos]
|
||||
self.line.close()
|
||||
|
||||
elif self.fourpoint:
|
||||
# self.line[self.pointnum] = pos # OLD
|
||||
|
||||
self.line[0] = self.current[-1]
|
||||
self.line[1] = pos
|
||||
|
||||
|
@ -196,12 +183,14 @@ class Canvas(QWidget):
|
|||
|
||||
# Polygon copy moving.
|
||||
if Qt.RightButton & ev.buttons():
|
||||
if self.selectedShapeCopy and self.prevPoint:
|
||||
if self.selectedShapesCopy and self.prevPoint:
|
||||
self.overrideCursor(CURSOR_MOVE)
|
||||
self.boundedMoveShape(self.selectedShapeCopy, pos)
|
||||
self.boundedMoveShape(self.selectedShapesCopy, pos)
|
||||
self.repaint()
|
||||
elif self.selectedShape:
|
||||
self.selectedShapeCopy = self.selectedShape.copy()
|
||||
elif self.selectedShapes:
|
||||
self.selectedShapesCopy = [
|
||||
s.copy() for s in self.selectedShapes
|
||||
]
|
||||
self.repaint()
|
||||
return
|
||||
|
||||
|
@ -211,11 +200,13 @@ class Canvas(QWidget):
|
|||
self.boundedMoveVertex(pos)
|
||||
self.shapeMoved.emit()
|
||||
self.repaint()
|
||||
elif self.selectedShape and self.prevPoint:
|
||||
self.movingShape = True
|
||||
elif self.selectedShapes and self.prevPoint:
|
||||
self.overrideCursor(CURSOR_MOVE)
|
||||
self.boundedMoveShape(self.selectedShape, pos)
|
||||
self.boundedMoveShape(self.selectedShapes, pos)
|
||||
self.shapeMoved.emit()
|
||||
self.repaint()
|
||||
self.movingShape = True
|
||||
else:
|
||||
#pan
|
||||
delta_x = pos.x() - self.pan_initial_pos.x()
|
||||
|
@ -263,65 +254,60 @@ class Canvas(QWidget):
|
|||
|
||||
def mousePressEvent(self, ev):
|
||||
pos = self.transformPos(ev.pos())
|
||||
|
||||
if ev.button() == Qt.LeftButton:
|
||||
if self.drawing():
|
||||
# self.handleDrawing(pos) # OLD
|
||||
|
||||
|
||||
if self.current and self.fourpoint: # ADD IF
|
||||
# Add point to existing shape.
|
||||
print('Adding points in mousePressEvent is ', self.line[1])
|
||||
self.current.addPoint(self.line[1])
|
||||
self.line[0] = self.current[-1]
|
||||
if self.current.isClosed():
|
||||
# print('1111')
|
||||
if self.current:
|
||||
if self.fourpoint: # ADD IF
|
||||
# Add point to existing shape.
|
||||
# print('Adding points in mousePressEvent is ', self.line[1])
|
||||
self.current.addPoint(self.line[1])
|
||||
self.line[0] = self.current[-1]
|
||||
if self.current.isClosed():
|
||||
# print('1111')
|
||||
self.finalise()
|
||||
elif self.drawSquare: # 增加
|
||||
assert len(self.current.points) == 1
|
||||
self.current.points = self.line.points
|
||||
self.finalise()
|
||||
elif not self.outOfPixmap(pos):
|
||||
# Create new shape.
|
||||
self.current = Shape()# self.current = Shape(shape_type=self.createMode)
|
||||
self.current = Shape()
|
||||
self.current.addPoint(pos)
|
||||
# if self.createMode == "point":
|
||||
# self.finalise()
|
||||
# else:
|
||||
# if self.createMode == "circle":
|
||||
# self.current.shape_type = "circle"
|
||||
self.line.points = [pos, pos]
|
||||
self.setHiding()
|
||||
self.drawingPolygon.emit(True)
|
||||
self.update()
|
||||
|
||||
|
||||
else:
|
||||
selection = self.selectShapePoint(pos)
|
||||
group_mode = int(ev.modifiers()) == Qt.ControlModifier
|
||||
self.selectShapePoint(pos, multiple_selection_mode=group_mode)
|
||||
self.prevPoint = pos
|
||||
|
||||
if selection is None:
|
||||
#pan
|
||||
QApplication.setOverrideCursor(QCursor(Qt.OpenHandCursor))
|
||||
self.pan_initial_pos = pos
|
||||
self.pan_initial_pos = pos
|
||||
|
||||
elif ev.button() == Qt.RightButton and self.editing():
|
||||
self.selectShapePoint(pos)
|
||||
group_mode = int(ev.modifiers()) == Qt.ControlModifier
|
||||
self.selectShapePoint(pos, multiple_selection_mode=group_mode)
|
||||
self.prevPoint = pos
|
||||
self.update()
|
||||
|
||||
def mouseReleaseEvent(self, ev):
|
||||
if ev.button() == Qt.RightButton:
|
||||
menu = self.menus[bool(self.selectedShapeCopy)]
|
||||
menu = self.menus[bool(self.selectedShapesCopy)]
|
||||
self.restoreCursor()
|
||||
if not menu.exec_(self.mapToGlobal(ev.pos()))\
|
||||
and self.selectedShapeCopy:
|
||||
and self.selectedShapesCopy:
|
||||
# Cancel the move by deleting the shadow copy.
|
||||
self.selectedShapeCopy = None
|
||||
# self.selectedShapeCopy = None
|
||||
self.selectedShapesCopy = []
|
||||
self.repaint()
|
||||
elif ev.button() == Qt.LeftButton and self.selectedShape: # OLD
|
||||
|
||||
elif ev.button() == Qt.LeftButton and self.selectedShapes:
|
||||
if self.selectedVertex():
|
||||
self.overrideCursor(CURSOR_POINT)
|
||||
else:
|
||||
self.overrideCursor(CURSOR_GRAB)
|
||||
|
||||
|
||||
elif ev.button() == Qt.LeftButton and not self.fourpoint:
|
||||
pos = self.transformPos(ev.pos())
|
||||
if self.drawing():
|
||||
|
@ -330,24 +316,37 @@ class Canvas(QWidget):
|
|||
#pan
|
||||
QApplication.restoreOverrideCursor() # ?
|
||||
|
||||
if self.movingShape and self.hShape:
|
||||
index = self.shapes.index(self.hShape)
|
||||
if (
|
||||
self.shapesBackups[-1][index].points
|
||||
!= self.shapes[index].points
|
||||
):
|
||||
self.storeShapes()
|
||||
self.shapeMoved.emit() # connect to updateBoxlist in PPOCRLabel.py
|
||||
|
||||
self.movingShape = False
|
||||
|
||||
|
||||
def endMove(self, copy=False):
|
||||
assert self.selectedShape and self.selectedShapeCopy
|
||||
shape = self.selectedShapeCopy
|
||||
#del shape.fill_color
|
||||
#del shape.line_color
|
||||
assert self.selectedShapes and self.selectedShapesCopy
|
||||
assert len(self.selectedShapesCopy) == len(self.selectedShapes)
|
||||
if copy:
|
||||
self.shapes.append(shape)
|
||||
self.selectedShape.selected = False
|
||||
self.selectedShape = shape
|
||||
self.repaint()
|
||||
for i, shape in enumerate(self.selectedShapesCopy):
|
||||
self.shapes.append(shape)
|
||||
self.selectedShapes[i].selected = False
|
||||
self.selectedShapes[i] = shape
|
||||
else:
|
||||
self.selectedShape.points = [p for p in shape.points]
|
||||
self.selectedShapeCopy = None
|
||||
for i, shape in enumerate(self.selectedShapesCopy):
|
||||
self.selectedShapes[i].points = shape.points
|
||||
self.selectedShapesCopy = []
|
||||
self.repaint()
|
||||
self.storeShapes()
|
||||
return True
|
||||
|
||||
def hideBackroundShapes(self, value):
|
||||
self.hideBackround = value
|
||||
if self.selectedShape:
|
||||
if self.selectedShapes:
|
||||
# Only hide other shapes if there is a current selection.
|
||||
# Otherwise the user will not be able to select a shape.
|
||||
self.setHiding(True)
|
||||
|
@ -363,7 +362,7 @@ class Canvas(QWidget):
|
|||
if self.pointnum == 3:
|
||||
self.finalise()
|
||||
|
||||
else: # 按住送掉后跳到这里
|
||||
else:
|
||||
initPos = self.current[0]
|
||||
print('initPos', self.current[0])
|
||||
minX = initPos.x()
|
||||
|
@ -399,28 +398,33 @@ class Canvas(QWidget):
|
|||
self.current.popPoint()
|
||||
self.finalise()
|
||||
|
||||
def selectShape(self, shape):
|
||||
self.deSelectShape()
|
||||
shape.selected = True
|
||||
self.selectedShape = shape
|
||||
def selectShapes(self, shapes):
|
||||
for s in shapes: s.seleted = True
|
||||
self.setHiding()
|
||||
self.selectionChanged.emit(True)
|
||||
self.selectionChanged.emit(shapes)
|
||||
self.update()
|
||||
|
||||
def selectShapePoint(self, point):
|
||||
|
||||
def selectShapePoint(self, point, multiple_selection_mode):
|
||||
"""Select the first shape created which contains this point."""
|
||||
self.deSelectShape()
|
||||
if self.selectedVertex(): # A vertex is marked for selection.
|
||||
index, shape = self.hVertex, self.hShape
|
||||
shape.highlightVertex(index, shape.MOVE_VERTEX)
|
||||
self.selectShape(shape)
|
||||
return self.hVertex
|
||||
for shape in reversed(self.shapes):
|
||||
if self.isVisible(shape) and shape.containsPoint(point):
|
||||
self.selectShape(shape)
|
||||
self.calculateOffsets(shape, point)
|
||||
return self.selectedShape
|
||||
return None
|
||||
else:
|
||||
for shape in reversed(self.shapes):
|
||||
if self.isVisible(shape) and shape.containsPoint(point):
|
||||
self.calculateOffsets(shape, point)
|
||||
self.setHiding()
|
||||
if multiple_selection_mode:
|
||||
if shape not in self.selectedShapes: # list
|
||||
self.selectionChanged.emit(
|
||||
self.selectedShapes + [shape]
|
||||
)
|
||||
else:
|
||||
self.selectionChanged.emit([shape])
|
||||
return
|
||||
self.deSelectShape()
|
||||
|
||||
def calculateOffsets(self, shape, point):
|
||||
rect = shape.boundingRect()
|
||||
|
@ -465,22 +469,28 @@ class Canvas(QWidget):
|
|||
else:
|
||||
shiftPos = pos - point
|
||||
|
||||
shape.moveVertexBy(index, shiftPos)
|
||||
if [shape[0].x(), shape[0].y(), shape[2].x(), shape[2].y()] \
|
||||
== [shape[3].x(),shape[1].y(),shape[1].x(),shape[3].y()]:
|
||||
shape.moveVertexBy(index, shiftPos)
|
||||
lindex = (index + 1) % 4
|
||||
rindex = (index + 3) % 4
|
||||
lshift = None
|
||||
rshift = None
|
||||
if index % 2 == 0:
|
||||
rshift = QPointF(shiftPos.x(), 0)
|
||||
lshift = QPointF(0, shiftPos.y())
|
||||
else:
|
||||
lshift = QPointF(shiftPos.x(), 0)
|
||||
rshift = QPointF(0, shiftPos.y())
|
||||
shape.moveVertexBy(rindex, rshift)
|
||||
shape.moveVertexBy(lindex, lshift)
|
||||
|
||||
lindex = (index + 1) % 4
|
||||
rindex = (index + 3) % 4
|
||||
lshift = None
|
||||
rshift = None
|
||||
if index % 2 == 0:
|
||||
rshift = QPointF(shiftPos.x(), 0)
|
||||
lshift = QPointF(0, shiftPos.y())
|
||||
else:
|
||||
lshift = QPointF(shiftPos.x(), 0)
|
||||
rshift = QPointF(0, shiftPos.y())
|
||||
shape.moveVertexBy(rindex, rshift)
|
||||
shape.moveVertexBy(lindex, lshift)
|
||||
shape.moveVertexBy(index, shiftPos)
|
||||
|
||||
def boundedMoveShape(self, shape, pos):
|
||||
|
||||
def boundedMoveShape(self, shapes, pos):
|
||||
if type(shapes).__name__ != 'list': shapes = [shapes]
|
||||
if self.outOfPixmap(pos):
|
||||
return False # No need to move
|
||||
o1 = pos + self.offsets[0]
|
||||
|
@ -497,46 +507,55 @@ class Canvas(QWidget):
|
|||
#self.calculateOffsets(self.selectedShape, pos)
|
||||
dp = pos - self.prevPoint
|
||||
if dp:
|
||||
shape.moveBy(dp)
|
||||
for shape in shapes:
|
||||
shape.moveBy(dp)
|
||||
self.prevPoint = pos
|
||||
return True
|
||||
return False
|
||||
|
||||
def deSelectShape(self):
|
||||
if self.selectedShape:
|
||||
self.selectedShape.selected = False
|
||||
self.selectedShape = None
|
||||
if self.selectedShapes:
|
||||
for shape in self.selectedShapes: shape.selected=False
|
||||
self.setHiding(False)
|
||||
self.selectionChanged.emit(False)
|
||||
self.selectionChanged.emit([])
|
||||
self.update()
|
||||
|
||||
def deleteSelected(self):
|
||||
if self.selectedShape:
|
||||
shape = self.selectedShape
|
||||
self.shapes.remove(self.selectedShape)
|
||||
self.selectedShape = None
|
||||
deleted_shapes = []
|
||||
if self.selectedShapes:
|
||||
for shape in self.selectedShapes:
|
||||
self.shapes.remove(shape)
|
||||
deleted_shapes.append(shape)
|
||||
self.storeShapes()
|
||||
self.selectedShapes = []
|
||||
self.update()
|
||||
return shape
|
||||
return deleted_shapes
|
||||
|
||||
def storeShapes(self):
|
||||
shapesBackup = []
|
||||
for shape in self.shapes:
|
||||
shapesBackup.append(shape.copy())
|
||||
if len(self.shapesBackups) >= 10:
|
||||
self.shapesBackups = self.shapesBackups[-9:]
|
||||
self.shapesBackups.append(shapesBackup)
|
||||
|
||||
def copySelectedShape(self):
|
||||
if self.selectedShape:
|
||||
shape = self.selectedShape.copy()
|
||||
self.deSelectShape()
|
||||
self.shapes.append(shape)
|
||||
shape.selected = True
|
||||
self.selectedShape = shape
|
||||
self.boundedShiftShape(shape)
|
||||
return shape
|
||||
if self.selectedShapes:
|
||||
self.selectedShapesCopy = [s.copy() for s in self.selectedShapes]
|
||||
self.boundedShiftShapes(self.selectedShapesCopy)
|
||||
self.endMove(copy=True)
|
||||
return self.selectedShapes
|
||||
|
||||
def boundedShiftShape(self, shape):
|
||||
def boundedShiftShapes(self, shapes):
|
||||
# Try to move in one direction, and if it fails in another.
|
||||
# Give up if both fail.
|
||||
point = shape[0]
|
||||
offset = QPointF(2.0, 2.0)
|
||||
self.calculateOffsets(shape, point)
|
||||
self.prevPoint = point
|
||||
if not self.boundedMoveShape(shape, point - offset):
|
||||
self.boundedMoveShape(shape, point + offset)
|
||||
for shape in shapes:
|
||||
point = shape[0]
|
||||
offset = QPointF(2.0, 2.0)
|
||||
self.calculateOffsets(shape, point)
|
||||
self.prevPoint = point
|
||||
if not self.boundedMoveShape(shape, point - offset):
|
||||
self.boundedMoveShape(shape, point + offset)
|
||||
|
||||
def paintEvent(self, event):
|
||||
if not self.pixmap:
|
||||
|
@ -560,8 +579,9 @@ class Canvas(QWidget):
|
|||
if self.current:
|
||||
self.current.paint(p)
|
||||
self.line.paint(p)
|
||||
if self.selectedShapeCopy:
|
||||
self.selectedShapeCopy.paint(p)
|
||||
if self.selectedShapesCopy:
|
||||
for s in self.selectedShapesCopy:
|
||||
s.paint(p)
|
||||
|
||||
# Paint rect
|
||||
if self.current is not None and len(self.line) == 2 and not self.fourpoint:
|
||||
|
@ -690,13 +710,13 @@ class Canvas(QWidget):
|
|||
elif key == Qt.Key_Return and self.canCloseShape():
|
||||
self.finalise()
|
||||
elif key == Qt.Key_Left and self.selectedShape:
|
||||
self.moveOnePixel('Left')
|
||||
self.moveOnePixel('Left')
|
||||
elif key == Qt.Key_Right and self.selectedShape:
|
||||
self.moveOnePixel('Right')
|
||||
self.moveOnePixel('Right')
|
||||
elif key == Qt.Key_Up and self.selectedShape:
|
||||
self.moveOnePixel('Up')
|
||||
self.moveOnePixel('Up')
|
||||
elif key == Qt.Key_Down and self.selectedShape:
|
||||
self.moveOnePixel('Down')
|
||||
self.moveOnePixel('Down')
|
||||
|
||||
def moveOnePixel(self, direction):
|
||||
# print(self.selectedShape.points)
|
||||
|
@ -739,6 +759,7 @@ class Canvas(QWidget):
|
|||
|
||||
if fill_color:
|
||||
self.shapes[-1].fill_color = fill_color
|
||||
self.storeShapes()
|
||||
|
||||
return self.shapes[-1]
|
||||
|
||||
|
@ -749,6 +770,17 @@ class Canvas(QWidget):
|
|||
self.line.points = [self.current[-1], self.current[0]]
|
||||
self.drawingPolygon.emit(True)
|
||||
|
||||
def undoLastPoint(self):
|
||||
if not self.current or self.current.isClosed():
|
||||
return
|
||||
self.current.popPoint()
|
||||
if len(self.current) > 0:
|
||||
self.line[0] = self.current[-1]
|
||||
else:
|
||||
self.current = None
|
||||
self.drawingPolygon.emit(False)
|
||||
self.repaint()
|
||||
|
||||
def resetAllLines(self):
|
||||
assert self.shapes
|
||||
self.current = self.shapes.pop()
|
||||
|
@ -762,11 +794,18 @@ class Canvas(QWidget):
|
|||
def loadPixmap(self, pixmap):
|
||||
self.pixmap = pixmap
|
||||
self.shapes = []
|
||||
self.repaint() # 这函数在哪
|
||||
self.repaint()
|
||||
|
||||
def loadShapes(self, shapes):
|
||||
self.shapes = list(shapes)
|
||||
def loadShapes(self, shapes, replace=True):
|
||||
if replace:
|
||||
self.shapes = list(shapes)
|
||||
else:
|
||||
self.shapes.extend(shapes)
|
||||
self.current = None
|
||||
self.hShape = None
|
||||
self.hVertex = None
|
||||
# self.hEdge = None
|
||||
self.storeShapes()
|
||||
self.repaint()
|
||||
|
||||
def setShapeVisible(self, shape, value):
|
||||
|
@ -793,6 +832,24 @@ class Canvas(QWidget):
|
|||
self.restoreCursor()
|
||||
self.pixmap = None
|
||||
self.update()
|
||||
self.shapesBackups = []
|
||||
|
||||
def setDrawingShapeToSquare(self, status):
|
||||
self.drawSquare = status
|
||||
|
||||
def restoreShape(self):
|
||||
if not self.isShapeRestorable:
|
||||
return
|
||||
self.shapesBackups.pop() # latest
|
||||
shapesBackup = self.shapesBackups.pop()
|
||||
self.shapes = shapesBackup
|
||||
self.selectedShapes = []
|
||||
for shape in self.shapes:
|
||||
shape.selected = False
|
||||
self.repaint()
|
||||
|
||||
@property
|
||||
def isShapeRestorable(self):
|
||||
if len(self.shapesBackups) < 2:
|
||||
return False
|
||||
return True
|
File diff suppressed because it is too large
Load Diff
|
@ -82,7 +82,7 @@ class Shape(object):
|
|||
return False
|
||||
|
||||
def addPoint(self, point):
|
||||
if not self.reachMaxPoints():
|
||||
if not self.reachMaxPoints(): # 4个点时发出close信号
|
||||
self.points.append(point)
|
||||
|
||||
def popPoint(self):
|
||||
|
|
|
@ -96,4 +96,7 @@ hideBox=隐藏所有标注
|
|||
showBox=显示所有标注
|
||||
saveLabel=保存标记结果
|
||||
singleRe=重识别此区块
|
||||
labelDialogOption=弹出标记输入框
|
||||
labelDialogOption=弹出标记输入框
|
||||
undo=撤销
|
||||
undoLastPoint=撤销上个点
|
||||
autoSaveMode=自动保存标记结果
|
|
@ -96,4 +96,7 @@ hideBox=Hide All Box
|
|||
showBox=Show All Box
|
||||
saveLabel=Save Label
|
||||
singleRe=Re-recognition RectBox
|
||||
labelDialogOption=Pop-up Label Input Dialog
|
||||
labelDialogOption=Pop-up Label Input Dialog
|
||||
undo=Undo
|
||||
undoLastPoint=Undo Last Point
|
||||
autoSaveMode=Auto Save Label Mode
|
|
@ -5,10 +5,11 @@ PaddleOCR aims to create multilingual, awesome, leading, and practical OCR tools
|
|||
|
||||
## Notice
|
||||
PaddleOCR supports both dynamic graph and static graph programming paradigm
|
||||
- Dynamic graph: dygraph branch (default), **supported by paddle 2.0rc1+ ([installation](./doc/doc_en/installation_en.md))**
|
||||
- Dynamic graph: dygraph branch (default), **supported by paddle 2.0.0 ([installation](./doc/doc_en/installation_en.md))**
|
||||
- Static graph: develop branch
|
||||
|
||||
**Recent updates**
|
||||
- 2021.1.21 update more than 25+ multilingual recognition models [models list](./doc/doc_en/models_list_en.md), including:English, Chinese, German, French, Japanese,Spanish,Portuguese Russia Arabic and so on. Models for more languages will continue to be updated [Develop Plan](https://github.com/PaddlePaddle/PaddleOCR/issues/1048).
|
||||
- 2020.12.15 update Data synthesis tool, i.e., [Style-Text](./StyleText/README.md),easy to synthesize a large number of images which are similar to the target scene image.
|
||||
- 2020.11.25 Update a new data annotation tool, i.e., [PPOCRLabel](./PPOCRLabel/README.md), which is helpful to improve the labeling efficiency. Moreover, the labeling results can be used in training of the PP-OCR system directly.
|
||||
- 2020.9.22 Update the PP-OCR technical article, https://arxiv.org/abs/2009.09941
|
||||
|
@ -41,7 +42,7 @@ The above pictures are the visualizations of the general ppocr_server model. For
|
|||
- Scan the QR code below with your Wechat, you can access to official technical exchange group. Look forward to your participation.
|
||||
|
||||
<div align="center">
|
||||
<img src="./doc/joinus.PNG" width = "200" height = "200" />
|
||||
<img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.0/doc/joinus.PNG" width = "200" height = "200" />
|
||||
</div>
|
||||
|
||||
|
||||
|
|
|
@ -4,11 +4,13 @@
|
|||
PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力使用者训练出更好的模型,并应用落地。
|
||||
## 注意
|
||||
PaddleOCR同时支持动态图与静态图两种编程范式
|
||||
- 动态图版本:dygraph分支(默认),需将paddle版本升级至2.0rc1+([快速安装](./doc/doc_ch/installation.md))
|
||||
- 动态图版本:dygraph分支(默认),需将paddle版本升级至2.0.0([快速安装](./doc/doc_ch/installation.md))
|
||||
- 静态图版本:develop分支
|
||||
|
||||
**近期更新**
|
||||
- 2021.1.18 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题,总数152个,每周一都会更新,欢迎大家持续关注。
|
||||
- 2021.2.1 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题,总数162个,每周一都会更新,欢迎大家持续关注。
|
||||
- 2021.1.26,28,29 PaddleOCR官方研发团队带来技术深入解读三日直播课,1月26日、28日、29日晚上19:30,[直播地址](https://live.bilibili.com/21689802)
|
||||
- 2021.1.21 更新多语言识别模型,目前支持语种超过27种,[多语言模型下载](./doc/doc_ch/models_list.md),包括中文简体、中文繁体、英文、法文、德文、韩文、日文、意大利文、西班牙文、葡萄牙文、俄罗斯文、阿拉伯文等,后续计划可以参考[多语言研发计划](https://github.com/PaddlePaddle/PaddleOCR/issues/1048)
|
||||
- 2020.12.15 更新数据合成工具[Style-Text](./StyleText/README_ch.md),可以批量合成大量与目标场景类似的图像,在多个场景验证,效果明显提升。
|
||||
- 2020.11.25 更新半自动标注工具[PPOCRLabel](./PPOCRLabel/README_ch.md),辅助开发者高效完成标注任务,输出格式与PP-OCR训练任务完美衔接。
|
||||
- 2020.9.22 更新PP-OCR技术文章,https://arxiv.org/abs/2009.09941
|
||||
|
@ -44,7 +46,7 @@ PaddleOCR同时支持动态图与静态图两种编程范式
|
|||
- 微信扫描二维码加入官方交流群,获得更高效的问题答疑,与各行各业开发者充分交流,期待您的加入。
|
||||
|
||||
<div align="center">
|
||||
<img src="./doc/joinus.PNG" width = "200" height = "200" />
|
||||
<img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.0/doc/joinus.PNG" width = "200" height = "200" />
|
||||
</div>
|
||||
|
||||
## 快速体验
|
||||
|
|
|
@ -72,7 +72,7 @@ fusion_generator:
|
|||
python3 tools/synth_image.py -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en
|
||||
```
|
||||
|
||||
* Note 1: The language options is correspond to the corpus. Currently, the tool only supports English, Simplified Chinese and Korean.
|
||||
* Note 1: The language options is correspond to the corpus. Currently, the tool only supports English(en), Simplified Chinese(ch) and Korean(ko).
|
||||
* Note 2: Synth-Text is mainly used to generate images for OCR recognition models.
|
||||
So the height of style images should be around 32 pixels. Images in other sizes may behave poorly.
|
||||
* Note 3: You can modify `use_gpu` in `configs/config.yml` to determine whether to use GPU for prediction.
|
||||
|
@ -120,7 +120,7 @@ In actual application scenarios, it is often necessary to synthesize pictures in
|
|||
* `with_label`:Whether the `label_file` is label file list.
|
||||
* `CorpusGenerator`:
|
||||
* `method`:Method of CorpusGenerator,supports `FileCorpus` and `EnNumCorpus`. If `EnNumCorpus` is used,No other configuration is needed,otherwise you need to set `corpus_file` and `language`.
|
||||
* `language`:Language of the corpus.
|
||||
* `language`:Language of the corpus. Currently, the tool only supports English(en), Simplified Chinese(ch) and Korean(ko).
|
||||
* `corpus_file`: Filepath of the corpus. Corpus file should be a text file which will be split by line-endings('\n'). Corpus generator samples one line each time.
|
||||
|
||||
|
||||
|
|
|
@ -63,10 +63,10 @@ fusion_generator:
|
|||
```python
|
||||
python3 tools/synth_image.py -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en
|
||||
```
|
||||
* 注1:语言选项和语料相对应,目前该工具只支持英文、简体中文和韩语。
|
||||
* 注1:语言选项和语料相对应,目前支持英文(en)、简体中文(ch)和韩语(ko)。
|
||||
* 注2:Style-Text生成的数据主要应用于OCR识别场景。基于当前PaddleOCR识别模型的设计,我们主要支持高度在32左右的风格图像。
|
||||
如果输入图像尺寸相差过多,效果可能不佳。
|
||||
* 注3:可以通过修改配置文件中的`use_gpu`(true或者false)参数来决定是否使用GPU进行预测。
|
||||
* 注3:可以通过修改配置文件`configs/config.yml`中的`use_gpu`(true或者false)参数来决定是否使用GPU进行预测。
|
||||
|
||||
|
||||
例如,输入如下图片和语料"PaddleOCR":
|
||||
|
@ -105,7 +105,7 @@ python3 tools/synth_image.py -c configs/config.yml --style_image examples/style_
|
|||
* `with_label`:标志`label_file`是否为label文件。
|
||||
* `CorpusGenerator`:
|
||||
* `method`:语料生成方法,目前有`FileCorpus`和`EnNumCorpus`可选。如果使用`EnNumCorpus`,则不需要填写其他配置,否则需要修改`corpus_file`和`language`;
|
||||
* `language`:语料的语种;
|
||||
* `language`:语料的语种,目前支持英文(en)、简体中文(ch)和韩语(ko);
|
||||
* `corpus_file`: 语料文件路径。语料文件应使用文本文件。语料生成器首先会将语料按行切分,之后每次随机选取一行。
|
||||
|
||||
语料文件格式示例:
|
||||
|
|
|
@ -38,7 +38,15 @@ class StyleTextRecPredictor(object):
|
|||
self.std = config["Predictor"]["std"]
|
||||
self.expand_result = config["Predictor"]["expand_result"]
|
||||
|
||||
def predict(self, style_input, text_input):
|
||||
def reshape_to_same_height(self, img_list):
|
||||
h = img_list[0].shape[0]
|
||||
for idx in range(1, len(img_list)):
|
||||
new_w = round(1.0 * img_list[idx].shape[1] /
|
||||
img_list[idx].shape[0] * h)
|
||||
img_list[idx] = cv2.resize(img_list[idx], (new_w, h))
|
||||
return img_list
|
||||
|
||||
def predict_single_image(self, style_input, text_input):
|
||||
style_input = self.rep_style_input(style_input, text_input)
|
||||
tensor_style_input = self.preprocess(style_input)
|
||||
tensor_text_input = self.preprocess(text_input)
|
||||
|
@ -64,6 +72,21 @@ class StyleTextRecPredictor(object):
|
|||
"fake_bg": fake_bg,
|
||||
}
|
||||
|
||||
def predict(self, style_input, text_input_list):
|
||||
if not isinstance(text_input_list, (tuple, list)):
|
||||
return self.predict_single_image(style_input, text_input_list)
|
||||
|
||||
synth_result_list = []
|
||||
for text_input in text_input_list:
|
||||
synth_result = self.predict_single_image(style_input, text_input)
|
||||
synth_result_list.append(synth_result)
|
||||
|
||||
for key in synth_result:
|
||||
res = [r[key] for r in synth_result_list]
|
||||
res = self.reshape_to_same_height(res)
|
||||
synth_result[key] = np.concatenate(res, axis=1)
|
||||
return synth_result
|
||||
|
||||
def preprocess(self, img):
|
||||
img = (img.astype('float32') * self.scale - self.mean) / self.std
|
||||
img_height, img_width, channel = img.shape
|
||||
|
|
|
@ -12,6 +12,8 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import os
|
||||
import numpy as np
|
||||
import cv2
|
||||
|
||||
from utils.config import ArgsParser, load_config, override_config
|
||||
from utils.logging import get_logger
|
||||
|
@ -36,8 +38,9 @@ class ImageSynthesiser(object):
|
|||
self.predictor = getattr(predictors, predictor_method)(self.config)
|
||||
|
||||
def synth_image(self, corpus, style_input, language="en"):
|
||||
corpus, text_input = self.text_drawer.draw_text(corpus, language)
|
||||
synth_result = self.predictor.predict(style_input, text_input)
|
||||
corpus_list, text_input_list = self.text_drawer.draw_text(
|
||||
corpus, language, style_input_width=style_input.shape[1])
|
||||
synth_result = self.predictor.predict(style_input, text_input_list)
|
||||
return synth_result
|
||||
|
||||
|
||||
|
@ -59,12 +62,15 @@ class DatasetSynthesiser(ImageSynthesiser):
|
|||
for i in range(self.output_num):
|
||||
style_data = self.style_sampler.sample()
|
||||
style_input = style_data["image"]
|
||||
corpus_language, text_input_label = self.corpus_generator.generate(
|
||||
)
|
||||
text_input_label, text_input = self.text_drawer.draw_text(
|
||||
text_input_label, corpus_language)
|
||||
corpus_language, text_input_label = self.corpus_generator.generate()
|
||||
text_input_label_list, text_input_list = self.text_drawer.draw_text(
|
||||
text_input_label,
|
||||
corpus_language,
|
||||
style_input_width=style_input.shape[1])
|
||||
|
||||
synth_result = self.predictor.predict(style_input, text_input)
|
||||
text_input_label = "".join(text_input_label_list)
|
||||
|
||||
synth_result = self.predictor.predict(style_input, text_input_list)
|
||||
fake_fusion = synth_result["fake_fusion"]
|
||||
self.writer.save_image(fake_fusion, text_input_label)
|
||||
self.writer.save_label()
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
from PIL import Image, ImageDraw, ImageFont
|
||||
import numpy as np
|
||||
import cv2
|
||||
from utils.logging import get_logger
|
||||
|
||||
|
||||
|
@ -28,7 +29,11 @@ class StdTextDrawer(object):
|
|||
else:
|
||||
return int((self.height - 4)**2 / font_height)
|
||||
|
||||
def draw_text(self, corpus, language="en", crop=True):
|
||||
def draw_text(self,
|
||||
corpus,
|
||||
language="en",
|
||||
crop=True,
|
||||
style_input_width=None):
|
||||
if language not in self.support_languages:
|
||||
self.logger.warning(
|
||||
"language {} not supported, use en instead.".format(language))
|
||||
|
@ -37,21 +42,43 @@ class StdTextDrawer(object):
|
|||
width = min(self.max_width, len(corpus) * self.height) + 4
|
||||
else:
|
||||
width = len(corpus) * self.height + 4
|
||||
bg = Image.new("RGB", (width, self.height), color=(127, 127, 127))
|
||||
draw = ImageDraw.Draw(bg)
|
||||
|
||||
char_x = 2
|
||||
font = self.font_dict[language]
|
||||
for i, char_i in enumerate(corpus):
|
||||
char_size = font.getsize(char_i)[0]
|
||||
draw.text((char_x, 2), char_i, fill=(0, 0, 0), font=font)
|
||||
char_x += char_size
|
||||
if char_x >= width:
|
||||
corpus = corpus[0:i + 1]
|
||||
self.logger.warning("corpus length exceed limit: {}".format(
|
||||
corpus))
|
||||
if style_input_width is not None:
|
||||
width = min(width, style_input_width)
|
||||
|
||||
corpus_list = []
|
||||
text_input_list = []
|
||||
|
||||
while len(corpus) != 0:
|
||||
bg = Image.new("RGB", (width, self.height), color=(127, 127, 127))
|
||||
draw = ImageDraw.Draw(bg)
|
||||
char_x = 2
|
||||
font = self.font_dict[language]
|
||||
i = 0
|
||||
while i < len(corpus):
|
||||
char_i = corpus[i]
|
||||
char_size = font.getsize(char_i)[0]
|
||||
# split when char_x exceeds char size and index is not 0 (at least 1 char should be wroten on the image)
|
||||
if char_x + char_size >= width and i != 0:
|
||||
text_input = np.array(bg).astype(np.uint8)
|
||||
text_input = text_input[:, 0:char_x, :]
|
||||
|
||||
corpus_list.append(corpus[0:i])
|
||||
text_input_list.append(text_input)
|
||||
corpus = corpus[i:]
|
||||
break
|
||||
draw.text((char_x, 2), char_i, fill=(0, 0, 0), font=font)
|
||||
char_x += char_size
|
||||
|
||||
i += 1
|
||||
# the whole text is shorter than style input
|
||||
if i == len(corpus):
|
||||
text_input = np.array(bg).astype(np.uint8)
|
||||
text_input = text_input[:, 0:char_x, :]
|
||||
|
||||
corpus_list.append(corpus[0:i])
|
||||
text_input_list.append(text_input)
|
||||
corpus = corpus[i:]
|
||||
break
|
||||
|
||||
text_input = np.array(bg).astype(np.uint8)
|
||||
text_input = text_input[:, 0:char_x, :]
|
||||
return corpus, text_input
|
||||
return corpus_list, text_input_list
|
||||
|
|
|
@ -7,7 +7,6 @@ Global:
|
|||
save_epoch_step: 3
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: [0, 1000]
|
||||
# if pretrained_model is saved in static mode, load_static_weights must set to True
|
||||
cal_metric_during_train: True
|
||||
pretrained_model:
|
||||
checkpoints:
|
||||
|
@ -93,4 +92,4 @@ Eval:
|
|||
shuffle: False
|
||||
drop_last: False
|
||||
batch_size_per_card: 512
|
||||
num_workers: 4
|
||||
num_workers: 4
|
||||
|
|
|
@ -7,7 +7,10 @@ Global:
|
|||
save_epoch_step: 1200
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: [3000, 2000]
|
||||
# if pretrained_model is saved in static mode, load_static_weights must set to True
|
||||
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
|
||||
# from static branch, load_static_weights must be set as True.
|
||||
# 2. If you want to finetune the pretrained models we provide in the docs,
|
||||
# you should set load_static_weights as False.
|
||||
load_static_weights: True
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
|
||||
|
|
|
@ -7,7 +7,10 @@ Global:
|
|||
save_epoch_step: 1200
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: [3000, 2000]
|
||||
# if pretrained_model is saved in static mode, load_static_weights must set to True
|
||||
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
|
||||
# from static branch, load_static_weights must be set as True.
|
||||
# 2. If you want to finetune the pretrained models we provide in the docs,
|
||||
# you should set load_static_weights as False.
|
||||
load_static_weights: True
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: ./pretrain_models/ResNet18_vd_pretrained
|
||||
|
|
|
@ -7,7 +7,10 @@ Global:
|
|||
save_epoch_step: 1200
|
||||
# evaluation is run every 2000 iterations
|
||||
eval_batch_step: [0, 2000]
|
||||
# if pretrained_model is saved in static mode, load_static_weights must set to True
|
||||
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
|
||||
# from static branch, load_static_weights must be set as True.
|
||||
# 2. If you want to finetune the pretrained models we provide in the docs,
|
||||
# you should set load_static_weights as False.
|
||||
load_static_weights: True
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
|
||||
|
|
|
@ -7,7 +7,10 @@ Global:
|
|||
save_epoch_step: 1000
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: [4000, 5000]
|
||||
# if pretrained_model is saved in static mode, load_static_weights must set to True
|
||||
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
|
||||
# from static branch, load_static_weights must be set as True.
|
||||
# 2. If you want to finetune the pretrained models we provide in the docs,
|
||||
# you should set load_static_weights as False.
|
||||
load_static_weights: True
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
|
||||
|
|
|
@ -7,7 +7,10 @@ Global:
|
|||
save_epoch_step: 1200
|
||||
# evaluation is run every 2000 iterations
|
||||
eval_batch_step: [0,2000]
|
||||
# if pretrained_model is saved in static mode, load_static_weights must set to True
|
||||
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
|
||||
# from static branch, load_static_weights must be set as True.
|
||||
# 2. If you want to finetune the pretrained models we provide in the docs,
|
||||
# you should set load_static_weights as False.
|
||||
load_static_weights: True
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained
|
||||
|
|
|
@ -7,7 +7,10 @@ Global:
|
|||
save_epoch_step: 1000
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: [4000, 5000]
|
||||
# if pretrained_model is saved in static mode, load_static_weights must set to True
|
||||
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
|
||||
# from static branch, load_static_weights must be set as True.
|
||||
# 2. If you want to finetune the pretrained models we provide in the docs,
|
||||
# you should set load_static_weights as False.
|
||||
load_static_weights: True
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: ./pretrain_models/ResNet50_vd_pretrained/
|
||||
|
|
|
@ -7,7 +7,10 @@ Global:
|
|||
save_epoch_step: 1000
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: [4000, 5000]
|
||||
# if pretrained_model is saved in static mode, load_static_weights must set to True
|
||||
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
|
||||
# from static branch, load_static_weights must be set as True.
|
||||
# 2. If you want to finetune the pretrained models we provide in the docs,
|
||||
# you should set load_static_weights as False.
|
||||
load_static_weights: True
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained/
|
||||
|
|
|
@ -7,7 +7,10 @@ Global:
|
|||
save_epoch_step: 1000
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: [4000, 5000]
|
||||
# if pretrained_model is saved in static mode, load_static_weights must set to True
|
||||
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
|
||||
# from static branch, load_static_weights must be set as True.
|
||||
# 2. If you want to finetune the pretrained models we provide in the docs,
|
||||
# you should set load_static_weights as False.
|
||||
load_static_weights: True
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained/
|
||||
|
|
|
@ -7,7 +7,6 @@ Global:
|
|||
save_epoch_step: 3
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: [0, 2000]
|
||||
# if pretrained_model is saved in static mode, load_static_weights must set to True
|
||||
cal_metric_during_train: True
|
||||
pretrained_model:
|
||||
checkpoints:
|
||||
|
|
|
@ -7,7 +7,6 @@ Global:
|
|||
save_epoch_step: 3
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: [0, 2000]
|
||||
# if pretrained_model is saved in static mode, load_static_weights must set to True
|
||||
cal_metric_during_train: True
|
||||
pretrained_model:
|
||||
checkpoints:
|
||||
|
|
|
@ -131,7 +131,7 @@ if __name__ == '__main__':
|
|||
if FLAGS.val:
|
||||
global_config['Eval']['dataset']['label_file_list'] = [FLAGS.val]
|
||||
eval_label_path = os.path.join(project_path,FLAGS.val)
|
||||
loss_file(Eval_label_path)
|
||||
loss_file(eval_label_path)
|
||||
if FLAGS.dict:
|
||||
global_config['Global']['character_dict_path'] = FLAGS.dict
|
||||
dict_path = os.path.join(project_path,FLAGS.dict)
|
||||
|
|
|
@ -16,7 +16,7 @@ Global:
|
|||
infer_img:
|
||||
# for data or label process
|
||||
character_dict_path: ppocr/utils/dict/en_dict.txt
|
||||
character_type: ch
|
||||
character_type: EN
|
||||
max_text_length: 25
|
||||
infer_mode: False
|
||||
use_space_char: False
|
||||
|
|
|
@ -7,7 +7,6 @@ Global:
|
|||
save_epoch_step: 3
|
||||
# evaluation is run every 2000 iterations
|
||||
eval_batch_step: [0, 2000]
|
||||
# if pretrained_model is saved in static mode, load_static_weights must set to True
|
||||
cal_metric_during_train: True
|
||||
pretrained_model:
|
||||
checkpoints:
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
Global:
|
||||
use_gpu: true
|
||||
use_gpu: True
|
||||
epoch_num: 72
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 10
|
||||
|
@ -7,7 +7,6 @@ Global:
|
|||
save_epoch_step: 3
|
||||
# evaluation is run every 2000 iterations
|
||||
eval_batch_step: [0, 2000]
|
||||
# if pretrained_model is saved in static mode, load_static_weights must set to True
|
||||
cal_metric_during_train: True
|
||||
pretrained_model:
|
||||
checkpoints:
|
||||
|
@ -59,7 +58,7 @@ Metric:
|
|||
|
||||
Train:
|
||||
dataset:
|
||||
name: LMDBDateSet
|
||||
name: LMDBDataSet
|
||||
data_dir: ./train_data/data_lmdb_release/training/
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
|
@ -78,7 +77,7 @@ Train:
|
|||
|
||||
Eval:
|
||||
dataset:
|
||||
name: LMDBDateSet
|
||||
name: LMDBDataSet
|
||||
data_dir: ./train_data/data_lmdb_release/validation/
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
|
|
|
@ -7,7 +7,6 @@ Global:
|
|||
save_epoch_step: 3
|
||||
# evaluation is run every 2000 iterations
|
||||
eval_batch_step: [0, 2000]
|
||||
# if pretrained_model is saved in static mode, load_static_weights must set to True
|
||||
cal_metric_during_train: True
|
||||
pretrained_model:
|
||||
checkpoints:
|
||||
|
@ -58,7 +57,7 @@ Metric:
|
|||
|
||||
Train:
|
||||
dataset:
|
||||
name: LMDBDateSet
|
||||
name: LMDBDataSet
|
||||
data_dir: ./train_data/data_lmdb_release/training/
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
|
@ -77,7 +76,7 @@ Train:
|
|||
|
||||
Eval:
|
||||
dataset:
|
||||
name: LMDBDateSet
|
||||
name: LMDBDataSet
|
||||
data_dir: ./train_data/data_lmdb_release/validation/
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
|
|
|
@ -0,0 +1,101 @@
|
|||
Global:
|
||||
use_gpu: True
|
||||
epoch_num: 72
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 10
|
||||
save_model_dir: ./output/rec/rec_mv3_tps_bilstm_att/
|
||||
save_epoch_step: 3
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: [0, 2000]
|
||||
cal_metric_during_train: True
|
||||
pretrained_model:
|
||||
checkpoints:
|
||||
save_inference_dir:
|
||||
use_visualdl: False
|
||||
infer_img: doc/imgs_words/ch/word_1.jpg
|
||||
# for data or label process
|
||||
character_dict_path:
|
||||
character_type: en
|
||||
max_text_length: 25
|
||||
infer_mode: False
|
||||
use_space_char: False
|
||||
|
||||
|
||||
Optimizer:
|
||||
name: Adam
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
||||
lr:
|
||||
learning_rate: 0.0005
|
||||
regularizer:
|
||||
name: 'L2'
|
||||
factor: 0.00001
|
||||
|
||||
Architecture:
|
||||
model_type: rec
|
||||
algorithm: RARE
|
||||
Transform:
|
||||
name: TPS
|
||||
num_fiducial: 20
|
||||
loc_lr: 0.1
|
||||
model_name: small
|
||||
Backbone:
|
||||
name: MobileNetV3
|
||||
scale: 0.5
|
||||
model_name: large
|
||||
Neck:
|
||||
name: SequenceEncoder
|
||||
encoder_type: rnn
|
||||
hidden_size: 96
|
||||
Head:
|
||||
name: AttentionHead
|
||||
hidden_size: 96
|
||||
|
||||
|
||||
Loss:
|
||||
name: AttentionLoss
|
||||
|
||||
PostProcess:
|
||||
name: AttnLabelDecode
|
||||
|
||||
Metric:
|
||||
name: RecMetric
|
||||
main_indicator: acc
|
||||
|
||||
Train:
|
||||
dataset:
|
||||
name: LMDBDataSet
|
||||
data_dir: ./train_data/data_lmdb_release/training/
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- AttnLabelEncode: # Class handling label
|
||||
- RecResizeImg:
|
||||
image_shape: [3, 32, 100]
|
||||
- KeepKeys:
|
||||
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
||||
loader:
|
||||
shuffle: True
|
||||
batch_size_per_card: 256
|
||||
drop_last: True
|
||||
num_workers: 8
|
||||
|
||||
Eval:
|
||||
dataset:
|
||||
name: LMDBDataSet
|
||||
data_dir: ./train_data/data_lmdb_release/validation/
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- AttnLabelEncode: # Class handling label
|
||||
- RecResizeImg:
|
||||
image_shape: [3, 32, 100]
|
||||
- KeepKeys:
|
||||
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
||||
loader:
|
||||
shuffle: False
|
||||
drop_last: False
|
||||
batch_size_per_card: 256
|
||||
num_workers: 1
|
|
@ -1,5 +1,5 @@
|
|||
Global:
|
||||
use_gpu: true
|
||||
use_gpu: True
|
||||
epoch_num: 72
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 10
|
||||
|
@ -7,7 +7,6 @@ Global:
|
|||
save_epoch_step: 3
|
||||
# evaluation is run every 2000 iterations
|
||||
eval_batch_step: [0, 2000]
|
||||
# if pretrained_model is saved in static mode, load_static_weights must set to True
|
||||
cal_metric_during_train: True
|
||||
pretrained_model:
|
||||
checkpoints:
|
||||
|
@ -63,7 +62,7 @@ Metric:
|
|||
|
||||
Train:
|
||||
dataset:
|
||||
name: LMDBDateSet
|
||||
name: LMDBDataSet
|
||||
data_dir: ./train_data/data_lmdb_release/training/
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
|
@ -82,7 +81,7 @@ Train:
|
|||
|
||||
Eval:
|
||||
dataset:
|
||||
name: LMDBDateSet
|
||||
name: LMDBDataSet
|
||||
data_dir: ./train_data/data_lmdb_release/validation/
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
|
|
|
@ -7,7 +7,6 @@ Global:
|
|||
save_epoch_step: 3
|
||||
# evaluation is run every 2000 iterations
|
||||
eval_batch_step: [0, 2000]
|
||||
# if pretrained_model is saved in static mode, load_static_weights must set to True
|
||||
cal_metric_during_train: True
|
||||
pretrained_model:
|
||||
checkpoints:
|
||||
|
@ -58,7 +57,7 @@ Metric:
|
|||
|
||||
Train:
|
||||
dataset:
|
||||
name: LMDBDateSet
|
||||
name: LMDBDataSet
|
||||
data_dir: ./train_data/data_lmdb_release/training/
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
|
@ -77,7 +76,7 @@ Train:
|
|||
|
||||
Eval:
|
||||
dataset:
|
||||
name: LMDBDateSet
|
||||
name: LMDBDataSet
|
||||
data_dir: ./train_data/data_lmdb_release/validation/
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
|
|
|
@ -7,7 +7,6 @@ Global:
|
|||
save_epoch_step: 3
|
||||
# evaluation is run every 2000 iterations
|
||||
eval_batch_step: [0, 2000]
|
||||
# if pretrained_model is saved in static mode, load_static_weights must set to True
|
||||
cal_metric_during_train: True
|
||||
pretrained_model:
|
||||
checkpoints:
|
||||
|
@ -56,7 +55,7 @@ Metric:
|
|||
|
||||
Train:
|
||||
dataset:
|
||||
name: LMDBDateSet
|
||||
name: LMDBDataSet
|
||||
data_dir: ./train_data/data_lmdb_release/training/
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
|
@ -75,7 +74,7 @@ Train:
|
|||
|
||||
Eval:
|
||||
dataset:
|
||||
name: LMDBDateSet
|
||||
name: LMDBDataSet
|
||||
data_dir: ./train_data/data_lmdb_release/validation/
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
|
|
|
@ -0,0 +1,100 @@
|
|||
Global:
|
||||
use_gpu: True
|
||||
epoch_num: 400
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 10
|
||||
save_model_dir: ./output/rec/b3_rare_r34_none_gru/
|
||||
save_epoch_step: 3
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: [0, 2000]
|
||||
cal_metric_during_train: True
|
||||
pretrained_model:
|
||||
checkpoints:
|
||||
save_inference_dir:
|
||||
use_visualdl: False
|
||||
infer_img: doc/imgs_words/ch/word_1.jpg
|
||||
# for data or label process
|
||||
character_dict_path:
|
||||
character_type: en
|
||||
max_text_length: 25
|
||||
infer_mode: False
|
||||
use_space_char: False
|
||||
|
||||
|
||||
Optimizer:
|
||||
name: Adam
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
||||
lr:
|
||||
learning_rate: 0.0005
|
||||
regularizer:
|
||||
name: 'L2'
|
||||
factor: 0.00000
|
||||
|
||||
Architecture:
|
||||
model_type: rec
|
||||
algorithm: RARE
|
||||
Transform:
|
||||
name: TPS
|
||||
num_fiducial: 20
|
||||
loc_lr: 0.1
|
||||
model_name: large
|
||||
Backbone:
|
||||
name: ResNet
|
||||
layers: 34
|
||||
Neck:
|
||||
name: SequenceEncoder
|
||||
encoder_type: rnn
|
||||
hidden_size: 256 #96
|
||||
Head:
|
||||
name: AttentionHead # AttentionHead
|
||||
hidden_size: 256 #
|
||||
l2_decay: 0.00001
|
||||
|
||||
Loss:
|
||||
name: AttentionLoss
|
||||
|
||||
PostProcess:
|
||||
name: AttnLabelDecode
|
||||
|
||||
Metric:
|
||||
name: RecMetric
|
||||
main_indicator: acc
|
||||
|
||||
Train:
|
||||
dataset:
|
||||
name: LMDBDataSet
|
||||
data_dir: ./train_data/data_lmdb_release/training/
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- AttnLabelEncode: # Class handling label
|
||||
- RecResizeImg:
|
||||
image_shape: [3, 32, 100]
|
||||
- KeepKeys:
|
||||
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
||||
loader:
|
||||
shuffle: True
|
||||
batch_size_per_card: 256
|
||||
drop_last: True
|
||||
num_workers: 8
|
||||
|
||||
Eval:
|
||||
dataset:
|
||||
name: LMDBDataSet
|
||||
data_dir: ./train_data/data_lmdb_release/validation/
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- AttnLabelEncode: # Class handling label
|
||||
- RecResizeImg:
|
||||
image_shape: [3, 32, 100]
|
||||
- KeepKeys:
|
||||
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
||||
loader:
|
||||
shuffle: False
|
||||
drop_last: False
|
||||
batch_size_per_card: 256
|
||||
num_workers: 8
|
|
@ -7,7 +7,6 @@ Global:
|
|||
save_epoch_step: 3
|
||||
# evaluation is run every 2000 iterations
|
||||
eval_batch_step: [0, 2000]
|
||||
# if pretrained_model is saved in static mode, load_static_weights must set to True
|
||||
cal_metric_during_train: True
|
||||
pretrained_model:
|
||||
checkpoints:
|
||||
|
@ -62,7 +61,7 @@ Metric:
|
|||
|
||||
Train:
|
||||
dataset:
|
||||
name: LMDBDateSet
|
||||
name: LMDBDataSet
|
||||
data_dir: ./train_data/data_lmdb_release/training/
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
|
@ -81,7 +80,7 @@ Train:
|
|||
|
||||
Eval:
|
||||
dataset:
|
||||
name: LMDBDateSet
|
||||
name: LMDBDataSet
|
||||
data_dir: ./train_data/data_lmdb_release/validation/
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
|
|
|
@ -0,0 +1,106 @@
|
|||
Global:
|
||||
use_gpu: True
|
||||
epoch_num: 72
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 5
|
||||
save_model_dir: ./output/rec/srn_new
|
||||
save_epoch_step: 3
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: [0, 5000]
|
||||
cal_metric_during_train: True
|
||||
pretrained_model:
|
||||
checkpoints:
|
||||
save_inference_dir:
|
||||
use_visualdl: False
|
||||
infer_img: doc/imgs_words/ch/word_1.jpg
|
||||
# for data or label process
|
||||
character_dict_path:
|
||||
character_type: en
|
||||
max_text_length: 25
|
||||
num_heads: 8
|
||||
infer_mode: False
|
||||
use_space_char: False
|
||||
|
||||
|
||||
Optimizer:
|
||||
name: Adam
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
||||
clip_norm: 10.0
|
||||
lr:
|
||||
learning_rate: 0.0001
|
||||
|
||||
Architecture:
|
||||
model_type: rec
|
||||
algorithm: SRN
|
||||
in_channels: 1
|
||||
Transform:
|
||||
Backbone:
|
||||
name: ResNetFPN
|
||||
Head:
|
||||
name: SRNHead
|
||||
max_text_length: 25
|
||||
num_heads: 8
|
||||
num_encoder_TUs: 2
|
||||
num_decoder_TUs: 4
|
||||
hidden_dims: 512
|
||||
|
||||
Loss:
|
||||
name: SRNLoss
|
||||
|
||||
PostProcess:
|
||||
name: SRNLabelDecode
|
||||
|
||||
Metric:
|
||||
name: RecMetric
|
||||
main_indicator: acc
|
||||
|
||||
Train:
|
||||
dataset:
|
||||
name: LMDBDataSet
|
||||
data_dir: ./train_data/data_lmdb_release/training/
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- SRNLabelEncode: # Class handling label
|
||||
- SRNRecResizeImg:
|
||||
image_shape: [1, 64, 256]
|
||||
- KeepKeys:
|
||||
keep_keys: ['image',
|
||||
'label',
|
||||
'length',
|
||||
'encoder_word_pos',
|
||||
'gsrm_word_pos',
|
||||
'gsrm_slf_attn_bias1',
|
||||
'gsrm_slf_attn_bias2'] # dataloader will return list in this order
|
||||
loader:
|
||||
shuffle: False
|
||||
batch_size_per_card: 64
|
||||
drop_last: False
|
||||
num_workers: 4
|
||||
|
||||
Eval:
|
||||
dataset:
|
||||
name: LMDBDataSet
|
||||
data_dir: ./train_data/data_lmdb_release/validation/
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- SRNLabelEncode: # Class handling label
|
||||
- SRNRecResizeImg:
|
||||
image_shape: [1, 64, 256]
|
||||
- KeepKeys:
|
||||
keep_keys: ['image',
|
||||
'label',
|
||||
'length',
|
||||
'encoder_word_pos',
|
||||
'gsrm_word_pos',
|
||||
'gsrm_slf_attn_bias1',
|
||||
'gsrm_slf_attn_bias2']
|
||||
loader:
|
||||
shuffle: False
|
||||
drop_last: False
|
||||
batch_size_per_card: 32
|
||||
num_workers: 4
|
|
@ -133,7 +133,11 @@ if(WITH_MKL)
|
|||
endif ()
|
||||
endif()
|
||||
else()
|
||||
set(MATH_LIB ${PADDLE_LIB}/third_party/install/openblas/lib/libopenblas${CMAKE_STATIC_LIBRARY_SUFFIX})
|
||||
if (WIN32)
|
||||
set(MATH_LIB ${PADDLE_LIB}/third_party/install/openblas/lib/openblas${CMAKE_STATIC_LIBRARY_SUFFIX})
|
||||
else ()
|
||||
set(MATH_LIB ${PADDLE_LIB}/third_party/install/openblas/lib/libopenblas${CMAKE_STATIC_LIBRARY_SUFFIX})
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
# Note: libpaddle_inference_api.so/a must put before libpaddle_fluid.so/a
|
||||
|
@ -157,7 +161,7 @@ endif(WITH_STATIC_LIB)
|
|||
|
||||
if (NOT WIN32)
|
||||
set(DEPS ${DEPS}
|
||||
${MATH_LIB} ${MKLDNN_LIB}
|
||||
${MATH_LIB} ${MKLDNN_LIB}
|
||||
glog gflags protobuf z xxhash
|
||||
)
|
||||
if(EXISTS "${PADDLE_LIB}/third_party/install/snappystream/lib")
|
||||
|
|
|
@ -14,7 +14,7 @@ PaddleOCR在Windows 平台下基于`Visual Studio 2019 Community` 进行了测
|
|||
|
||||
### Step1: 下载PaddlePaddle C++ 预测库 fluid_inference
|
||||
|
||||
PaddlePaddle C++ 预测库针对不同的`CPU`和`CUDA`版本提供了不同的预编译版本,请根据实际情况下载: [C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/windows_cpp_inference.html)
|
||||
PaddlePaddle C++ 预测库针对不同的`CPU`和`CUDA`版本提供了不同的预编译版本,请根据实际情况下载: [C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/guides/05_inference_deployment/inference/windows_cpp_inference.html)
|
||||
|
||||
解压后`D:\projects\fluid_inference`目录包含内容为:
|
||||
```
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
# 服务器端C++预测
|
||||
|
||||
本教程将介绍在服务器端部署PaddleOCR超轻量中文检测、识别模型的详细步骤。
|
||||
本章节介绍PaddleOCR 模型的的C++部署方法,与之对应的python预测部署方式参考[文档](../../doc/doc_ch/inference.md)。
|
||||
C++在性能计算上优于python,因此,在大多数CPU、GPU部署场景,多采用C++的部署方式,本节将介绍如何在Linux\Windows (CPU\GPU)环境下配置C++环境并完成
|
||||
PaddleOCR模型部署。
|
||||
|
||||
|
||||
## 1. 准备环境
|
||||
|
@ -72,9 +74,21 @@ opencv3/
|
|||
|
||||
* 有2种方式获取Paddle预测库,下面进行详细介绍。
|
||||
|
||||
#### 1.2.1 预测库源码编译
|
||||
#### 1.2.1 直接下载安装
|
||||
|
||||
* [Paddle预测库官网](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/guides/05_inference_deployment/inference/build_and_install_lib_cn.html)上提供了不同cuda版本的Linux预测库,可以在官网查看并选择合适的预测库版本。
|
||||
|
||||
* 下载之后使用下面的方法解压。
|
||||
|
||||
```
|
||||
tar -xf paddle_inference.tgz
|
||||
```
|
||||
|
||||
最终会在当前的文件夹中生成`paddle_inference/`的子文件夹。
|
||||
|
||||
#### 1.2.2 预测库源码编译
|
||||
* 如果希望获取最新预测库特性,可以从Paddle github上克隆最新代码,源码编译预测库。
|
||||
* 可以参考[Paddle预测库官网](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html)的说明,从github上获取Paddle代码,然后进行编译,生成最新的预测库。使用git获取代码方法如下。
|
||||
* 可以参考[Paddle预测库官网](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/guides/05_inference_deployment/inference/build_and_install_lib_cn.html)的说明,从github上获取Paddle代码,然后进行编译,生成最新的预测库。使用git获取代码方法如下。
|
||||
|
||||
```shell
|
||||
git clone https://github.com/PaddlePaddle/Paddle.git
|
||||
|
@ -100,7 +114,7 @@ make -j
|
|||
make inference_lib_dist
|
||||
```
|
||||
|
||||
更多编译参数选项可以参考Paddle C++预测库官网:[https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html)。
|
||||
更多编译参数选项可以参考Paddle C++预测库官网:[https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/guides/05_inference_deployment/inference/build_and_install_lib_cn.html](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/guides/05_inference_deployment/inference/build_and_install_lib_cn.html)。
|
||||
|
||||
|
||||
* 编译完成之后,可以在`build/paddle_inference_install_dir/`文件下看到生成了以下文件及文件夹。
|
||||
|
@ -115,17 +129,7 @@ build/paddle_inference_install_dir/
|
|||
|
||||
其中`paddle`就是C++预测所需的Paddle库,`version.txt`中包含当前预测库的版本信息。
|
||||
|
||||
#### 1.2.2 直接下载安装
|
||||
|
||||
* [Paddle预测库官网](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html)上提供了不同cuda版本的Linux预测库,可以在官网查看并选择合适的预测库版本。
|
||||
|
||||
* 下载之后使用下面的方法解压。
|
||||
|
||||
```
|
||||
tar -xf paddle_inference.tgz
|
||||
```
|
||||
|
||||
最终会在当前的文件夹中生成`paddle_inference/`的子文件夹。
|
||||
|
||||
|
||||
## 2 开始运行
|
||||
|
@ -223,7 +227,7 @@ char_list_file ../../ppocr/utils/ppocr_keys_v1.txt # 字典文件
|
|||
visualize 1 # 是否对结果进行可视化,为1时,会在当前文件夹下保存文件名为`ocr_vis.png`的预测结果。
|
||||
```
|
||||
|
||||
* PaddleOCR也支持多语言的预测,更多细节可以参考[识别文档](../../doc/doc_ch/recognition.md)中的多语言字典与模型部分。
|
||||
* PaddleOCR也支持多语言的预测,更多支持的语言和模型可以参考[识别文档](../../doc/doc_ch/recognition.md)中的多语言字典与模型部分,如果希望进行多语言预测,只需将修改`tools/config.txt`中的`char_list_file`(字典文件路径)以及`rec_model_dir`(inference模型路径)字段即可。
|
||||
|
||||
最终屏幕上会输出检测结果如下。
|
||||
|
||||
|
@ -234,4 +238,4 @@ visualize 1 # 是否对结果进行可视化,为1时,会在当前文件夹
|
|||
|
||||
### 2.3 注意
|
||||
|
||||
* 在使用Paddle预测库时,推荐使用2.0.0-beta0版本的预测库。
|
||||
* 在使用Paddle预测库时,推荐使用2.0.0版本的预测库。
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
# Server-side C++ inference
|
||||
|
||||
|
||||
In this tutorial, we will introduce the detailed steps of deploying PaddleOCR ultra-lightweight Chinese detection and recognition models on the server side.
|
||||
This chapter introduces the C++ deployment method of the PaddleOCR model, and the corresponding python predictive deployment method refers to [document](../../doc/doc_ch/inference.md).
|
||||
C++ is better than python in terms of performance calculation. Therefore, in most CPU and GPU deployment scenarios, C++ deployment is mostly used.
|
||||
This section will introduce how to configure the C++ environment and complete it in the Linux\Windows (CPU\GPU) environment
|
||||
PaddleOCR model deployment.
|
||||
|
||||
|
||||
## 1. Prepare the environment
|
||||
|
@ -74,10 +76,23 @@ opencv3/
|
|||
|
||||
* There are 2 ways to obtain the Paddle inference library, described in detail below.
|
||||
|
||||
#### 1.2.1 Direct download and installation
|
||||
|
||||
#### 1.2.1 Compile from the source code
|
||||
* Different cuda versions of the Linux inference library (based on GCC 4.8.2) are provided on the
|
||||
[Paddle inference library official website](https://www.paddlepaddle.org.cn/documentation/docs/en/develop/guides/05_inference_deployment/inference/build_and_install_lib_en.html). You can view and select the appropriate version of the inference library on the official website.
|
||||
|
||||
|
||||
* After downloading, use the following method to uncompress.
|
||||
|
||||
```
|
||||
tar -xf paddle_inference.tgz
|
||||
```
|
||||
|
||||
Finally you can see the following files in the folder of `paddle_inference/`.
|
||||
|
||||
#### 1.2.2 Compile from the source code
|
||||
* If you want to get the latest Paddle inference library features, you can download the latest code from Paddle github repository and compile the inference library from the source code.
|
||||
* You can refer to [Paddle inference library] (https://www.paddlepaddle.org.cn/documentation/docs/en/advanced_guide/inference_deployment/inference/build_and_install_lib_en.html) to get the Paddle source code from github, and then compile To generate the latest inference library. The method of using git to access the code is as follows.
|
||||
* You can refer to [Paddle inference library] (https://www.paddlepaddle.org.cn/documentation/docs/en/develop/guides/05_inference_deployment/inference/build_and_install_lib_en.html) to get the Paddle source code from github, and then compile To generate the latest inference library. The method of using git to access the code is as follows.
|
||||
|
||||
|
||||
```shell
|
||||
|
@ -104,7 +119,7 @@ make -j
|
|||
make inference_lib_dist
|
||||
```
|
||||
|
||||
For more compilation parameter options, please refer to the official website of the Paddle C++ inference library:[https://www.paddlepaddle.org.cn/documentation/docs/en/advanced_guide/inference_deployment/inference/build_and_install_lib_en.html](https://www.paddlepaddle.org.cn/documentation/docs/en/advanced_guide/inference_deployment/inference/build_and_install_lib_en.html).
|
||||
For more compilation parameter options, please refer to the official website of the Paddle C++ inference library:[https://www.paddlepaddle.org.cn/documentation/docs/en/develop/guides/05_inference_deployment/inference/build_and_install_lib_en.html](https://www.paddlepaddle.org.cn/documentation/docs/en/develop/guides/05_inference_deployment/inference/build_and_install_lib_en.html).
|
||||
|
||||
|
||||
* After the compilation process, you can see the following files in the folder of `build/paddle_inference_install_dir/`.
|
||||
|
@ -120,22 +135,6 @@ build/paddle_inference_install_dir/
|
|||
Among them, `paddle` is the Paddle library required for C++ prediction later, and `version.txt` contains the version information of the current inference library.
|
||||
|
||||
|
||||
|
||||
#### 1.2.2 Direct download and installation
|
||||
|
||||
* Different cuda versions of the Linux inference library (based on GCC 4.8.2) are provided on the
|
||||
[Paddle inference library official website](https://www.paddlepaddle.org.cn/documentation/docs/en/advanced_guide/inference_deployment/inference/build_and_install_lib_en.html). You can view and select the appropriate version of the inference library on the official website.
|
||||
|
||||
|
||||
* After downloading, use the following method to uncompress.
|
||||
|
||||
```
|
||||
tar -xf paddle_inference.tgz
|
||||
```
|
||||
|
||||
Finally you can see the following files in the folder of `paddle_inference/`.
|
||||
|
||||
|
||||
## 2. Compile and run the demo
|
||||
|
||||
### 2.1 Export the inference model
|
||||
|
@ -233,7 +232,7 @@ char_list_file ../../ppocr/utils/ppocr_keys_v1.txt # dictionary file
|
|||
visualize 1 # Whether to visualize the results,when it is set as 1, The prediction result will be save in the image file `./ocr_vis.png`.
|
||||
```
|
||||
|
||||
* Multi-language inference is also supported in PaddleOCR, for more details, please refer to part of multi-language dictionaries and models in [recognition tutorial](../../doc/doc_en/recognition_en.md).
|
||||
* Multi-language inference is also supported in PaddleOCR, you can refer to [recognition tutorial](../../doc/doc_en/recognition_en.md) for more supported languages and models in PaddleOCR. Specifically, if you want to infer using multi-language models, you just need to modify values of `char_list_file` and `rec_model_dir` in file `tools/config.txt`.
|
||||
|
||||
|
||||
The detection results will be shown on the screen, which is as follows.
|
||||
|
@ -245,4 +244,4 @@ The detection results will be shown on the screen, which is as follows.
|
|||
|
||||
### 2.3 Notes
|
||||
|
||||
* Paddle2.0.0-beta0 inference model library is recommended for this toturial.
|
||||
* Paddle2.0.0 inference model library is recommended for this toturial.
|
||||
|
|
|
@ -50,6 +50,11 @@ int main(int argc, char **argv) {
|
|||
|
||||
cv::Mat srcimg = cv::imread(img_path, cv::IMREAD_COLOR);
|
||||
|
||||
if (!srcimg.data) {
|
||||
std::cerr << "[ERROR] image read failed! image path: " << img_path << "\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
DBDetector det(config.det_model_dir, config.use_gpu, config.gpu_id,
|
||||
config.gpu_mem, config.cpu_math_library_num_threads,
|
||||
config.use_mkldnn, config.max_side_len, config.det_db_thresh,
|
||||
|
|
|
@ -9,7 +9,7 @@ use_mkldnn 0
|
|||
max_side_len 960
|
||||
det_db_thresh 0.3
|
||||
det_db_box_thresh 0.5
|
||||
det_db_unclip_ratio 2.0
|
||||
det_db_unclip_ratio 1.6
|
||||
det_model_dir ./inference/ch_ppocr_mobile_v2.0_det_infer/
|
||||
|
||||
# cls config
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# Version: 2.0.0
|
||||
FROM registry.baidubce.com/paddlepaddle/paddle:2.0.0rc1
|
||||
FROM registry.baidubce.com/paddlepaddle/paddle:2.0.0
|
||||
|
||||
# PaddleOCR base on Python3.7
|
||||
RUN pip3.7 install --upgrade pip -i https://mirror.baidu.com/pypi/simple
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# Version: 2.0.0
|
||||
FROM egistry.baidubce.com/paddlepaddle/paddle:2.0.0rc1-gpu-cuda10.0-cudnn7
|
||||
FROM registry.baidubce.com/paddlepaddle/paddle:2.0.0-gpu-cuda10.1-cudnn7
|
||||
|
||||
# PaddleOCR base on Python3.7
|
||||
RUN pip3.7 install --upgrade pip -i https://mirror.baidu.com/pypi/simple
|
||||
|
|
|
@ -20,7 +20,8 @@ def read_params():
|
|||
#DB parmas
|
||||
cfg.det_db_thresh = 0.3
|
||||
cfg.det_db_box_thresh = 0.5
|
||||
cfg.det_db_unclip_ratio = 2.0
|
||||
cfg.det_db_unclip_ratio = 1.6
|
||||
cfg.use_dilation = False
|
||||
|
||||
# #EAST parmas
|
||||
# cfg.det_east_score_thresh = 0.8
|
||||
|
|
|
@ -20,7 +20,8 @@ def read_params():
|
|||
#DB parmas
|
||||
cfg.det_db_thresh = 0.3
|
||||
cfg.det_db_box_thresh = 0.5
|
||||
cfg.det_db_unclip_ratio = 2.0
|
||||
cfg.det_db_unclip_ratio = 1.6
|
||||
cfg.use_dilation = False
|
||||
|
||||
#EAST parmas
|
||||
cfg.det_east_score_thresh = 0.8
|
||||
|
|
|
@ -0,0 +1,64 @@
|
|||
|
||||
## 介绍
|
||||
|
||||
复杂的模型有利于提高模型的性能,但也导致模型中存在一定冗余,模型裁剪通过移出网络模型中的子模型来减少这种冗余,达到减少模型计算复杂度,提高模型推理性能的目的。
|
||||
本教程将介绍如何使用飞桨模型压缩库PaddleSlim做PaddleOCR模型的压缩。
|
||||
[PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim)集成了模型剪枝、量化(包括量化训练和离线量化)、蒸馏和神经网络搜索等多种业界常用且领先的模型压缩功能,如果您感兴趣,可以关注并了解。
|
||||
|
||||
|
||||
在开始本教程之前,建议先了解:
|
||||
1. [PaddleOCR模型的训练方法](../../../doc/doc_ch/quickstart.md)
|
||||
2. [模型裁剪教程](https://github.com/PaddlePaddle/PaddleSlim/blob/release%2F2.0.0/docs/zh_cn/tutorials/pruning/dygraph/filter_pruning.md)
|
||||
|
||||
|
||||
## 快速开始
|
||||
|
||||
模型裁剪主要包括四个步骤:
|
||||
1. 安装 PaddleSlim
|
||||
2. 准备训练好的模型
|
||||
3. 敏感度分析、裁剪训练
|
||||
4. 导出模型、预测部署
|
||||
|
||||
### 1. 安装PaddleSlim
|
||||
|
||||
```bash
|
||||
git clone https://github.com/PaddlePaddle/PaddleSlim.git
|
||||
git checkout develop
|
||||
cd Paddleslim
|
||||
python3 setup.py install
|
||||
```
|
||||
|
||||
### 2. 获取预训练模型
|
||||
模型裁剪需要加载事先训练好的模型,PaddleOCR也提供了一系列(模型)[../../../doc/doc_ch/models_list.md],开发者可根据需要自行选择模型或使用自己的模型。
|
||||
|
||||
### 3. 敏感度分析训练
|
||||
|
||||
加载预训练模型后,通过对现有模型的每个网络层进行敏感度分析,得到敏感度文件:sen.pickle,可以通过PaddleSlim提供的[接口](https://github.com/PaddlePaddle/PaddleSlim/blob/9b01b195f0c4bc34a1ab434751cb260e13d64d9e/paddleslim/dygraph/prune/filter_pruner.py#L75)加载文件,获得各网络层在不同裁剪比例下的精度损失。从而了解各网络层冗余度,决定每个网络层的裁剪比例。
|
||||
敏感度文件内容格式:
|
||||
sen.pickle(Dict){
|
||||
'layer_weight_name_0': sens_of_each_ratio(Dict){'pruning_ratio_0': acc_loss, 'pruning_ratio_1': acc_loss}
|
||||
'layer_weight_name_1': sens_of_each_ratio(Dict){'pruning_ratio_0': acc_loss, 'pruning_ratio_1': acc_loss}
|
||||
}
|
||||
|
||||
例子:
|
||||
{
|
||||
'conv10_expand_weights': {0.1: 0.006509952684312718, 0.2: 0.01827734339798862, 0.3: 0.014528405644659832, 0.6: 0.06536008804270439, 0.8: 0.11798612250664964, 0.7: 0.12391408417493704, 0.4: 0.030615754498018757, 0.5: 0.047105205602406594}
|
||||
'conv10_linear_weights': {0.1: 0.05113190831455035, 0.2: 0.07705573833558801, 0.3: 0.12096721757739311, 0.6: 0.5135061352930738, 0.8: 0.7908166677143281, 0.7: 0.7272187676899062, 0.4: 0.1819252083008504, 0.5: 0.3728054727792405}
|
||||
}
|
||||
加载敏感度文件后会返回一个字典,字典中的keys为网络模型参数模型的名字,values为一个字典,里面保存了相应网络层的裁剪敏感度信息。例如在例子中,conv10_expand_weights所对应的网络层在裁掉10%的卷积核后模型性能相较原模型会下降0.65%,详细信息可见[PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/docs/zh_cn/algo/algo.md#2-%E5%8D%B7%E7%A7%AF%E6%A0%B8%E5%89%AA%E8%A3%81%E5%8E%9F%E7%90%86)
|
||||
|
||||
进入PaddleOCR根目录,通过以下命令对模型进行敏感度分析训练:
|
||||
```bash
|
||||
python3.7 deploy/slim/prune/sensitivity_anal.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrain_weights="your trained model"
|
||||
```
|
||||
|
||||
### 4. 导出模型、预测部署
|
||||
|
||||
在得到裁剪训练保存的模型后,我们可以将其导出为inference_model:
|
||||
```bash
|
||||
pytho3.7 deploy/slim/prune/export_prune_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrain_weights=./output/det_db/best_accuracy Global.save_inference_dir=inference_model
|
||||
```
|
||||
|
||||
inference model的预测和部署参考:
|
||||
1. [inference model python端预测](../../../doc/doc_ch/inference.md)
|
||||
2. [inference model C++预测](../../cpp_infer/readme.md)
|
|
@ -0,0 +1,71 @@
|
|||
|
||||
## Introduction
|
||||
|
||||
Generally, a more complex model would achive better performance in the task, but it also leads to some redundancy in the model. Model Pruning is a technique that reduces this redundancy by removing the sub-models in the neural network model, so as to reduce model calculation complexity and improve model inference performance.
|
||||
|
||||
This example uses PaddleSlim provided[APIs of Pruning](https://paddlepaddle.github.io/PaddleSlim/api/prune_api/) to compress the OCR model.
|
||||
[PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim), an open source library which integrates model pruning, quantization (including quantization training and offline quantization), distillation, neural network architecture search, and many other commonly used and leading model compression technique in the industry.
|
||||
|
||||
It is recommended that you could understand following pages before reading this example:
|
||||
1. [PaddleOCR training methods](../../../doc/doc_ch/quickstart.md)
|
||||
2. [The demo of prune](https://github.com/PaddlePaddle/PaddleSlim/blob/release%2F2.0.0/docs/zh_cn/tutorials/pruning/dygraph/filter_pruning.md)
|
||||
|
||||
## Quick start
|
||||
|
||||
Five steps for OCR model prune:
|
||||
1. Install PaddleSlim
|
||||
2. Prepare the trained model
|
||||
3. Sensitivity analysis and tailoring training
|
||||
4. Export model, predict deployment
|
||||
|
||||
### 1. Install PaddleSlim
|
||||
|
||||
```bash
|
||||
git clone https://github.com/PaddlePaddle/PaddleSlim.git
|
||||
git checkout develop
|
||||
cd Paddleslim
|
||||
python3 setup.py install
|
||||
```
|
||||
|
||||
|
||||
### 2. Download Pretrain Model
|
||||
Model prune needs to load pre-trained models.
|
||||
PaddleOCR also provides a series of (models)[../../../doc/doc_en/models_list_en.md]. Developers can choose their own models or use their own models according to their needs.
|
||||
|
||||
|
||||
### 3. Pruning sensitivity analysis
|
||||
|
||||
After the pre-training model is loaded, sensitivity analysis is performed on each network layer of the model to understand the redundancy of each network layer, and save a sensitivity file which named: sen.pickle. After that, user could load the sensitivity file via the [methods provided by PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/paddleslim/prune/sensitive.py#L221) and determining the pruning ratio of each network layer automatically. For specific details of sensitivity analysis, see:[Sensitivity analysis](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/docs/zh_cn/tutorials/image_classification_sensitivity_analysis_tutorial.md)
|
||||
The data format of sensitivity file:
|
||||
sen.pickle(Dict){
|
||||
'layer_weight_name_0': sens_of_each_ratio(Dict){'pruning_ratio_0': acc_loss, 'pruning_ratio_1': acc_loss}
|
||||
'layer_weight_name_1': sens_of_each_ratio(Dict){'pruning_ratio_0': acc_loss, 'pruning_ratio_1': acc_loss}
|
||||
}
|
||||
|
||||
example:
|
||||
{
|
||||
'conv10_expand_weights': {0.1: 0.006509952684312718, 0.2: 0.01827734339798862, 0.3: 0.014528405644659832, 0.6: 0.06536008804270439, 0.8: 0.11798612250664964, 0.7: 0.12391408417493704, 0.4: 0.030615754498018757, 0.5: 0.047105205602406594}
|
||||
'conv10_linear_weights': {0.1: 0.05113190831455035, 0.2: 0.07705573833558801, 0.3: 0.12096721757739311, 0.6: 0.5135061352930738, 0.8: 0.7908166677143281, 0.7: 0.7272187676899062, 0.4: 0.1819252083008504, 0.5: 0.3728054727792405}
|
||||
}
|
||||
The function would return a dict after loading the sensitivity file. The keys of the dict are name of parameters in each layer. And the value of key is the information about pruning sensitivity of correspoding layer. In example, pruning 10% filter of the layer corresponding to conv10_expand_weights would lead to 0.65% degradation of model performance. The details could be seen at: [Sensitivity analysis](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/docs/zh_cn/algo/algo.md#2-%E5%8D%B7%E7%A7%AF%E6%A0%B8%E5%89%AA%E8%A3%81%E5%8E%9F%E7%90%86)
|
||||
|
||||
|
||||
Enter the PaddleOCR root directory,perform sensitivity analysis on the model with the following command:
|
||||
|
||||
```bash
|
||||
|
||||
python3.7 deploy/slim/prune/sensitivity_anal.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrain_weights="your trained model"
|
||||
|
||||
```
|
||||
|
||||
|
||||
### 5. Export inference model and deploy it
|
||||
|
||||
We can export the pruned model as inference_model for deployment:
|
||||
```bash
|
||||
python deploy/slim/prune/export_prune_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrain_weights=./output/det_db/best_accuracy Global.test_batch_size_per_card=1 Global.save_inference_dir=inference_model
|
||||
```
|
||||
|
||||
Reference for prediction and deployment of inference model:
|
||||
1. [inference model python prediction](../../../doc/doc_en/inference_en.md)
|
||||
2. [inference model C++ prediction](../../cpp_infer/readme_en.md)
|
|
@ -0,0 +1,125 @@
|
|||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
__dir__ = os.path.dirname(__file__)
|
||||
sys.path.append(__dir__)
|
||||
sys.path.append(os.path.join(__dir__, '..', '..', '..'))
|
||||
sys.path.append(os.path.join(__dir__, '..', '..', '..', 'tools'))
|
||||
|
||||
import paddle
|
||||
from ppocr.data import build_dataloader
|
||||
from ppocr.modeling.architectures import build_model
|
||||
|
||||
from ppocr.postprocess import build_post_process
|
||||
from ppocr.metrics import build_metric
|
||||
from ppocr.utils.save_load import init_model
|
||||
import tools.program as program
|
||||
|
||||
|
||||
def main(config, device, logger, vdl_writer):
|
||||
|
||||
global_config = config['Global']
|
||||
|
||||
# build dataloader
|
||||
valid_dataloader = build_dataloader(config, 'Eval', device, logger)
|
||||
|
||||
# build post process
|
||||
post_process_class = build_post_process(config['PostProcess'],
|
||||
global_config)
|
||||
|
||||
# build model
|
||||
# for rec algorithm
|
||||
if hasattr(post_process_class, 'character'):
|
||||
char_num = len(getattr(post_process_class, 'character'))
|
||||
config['Architecture']["Head"]['out_channels'] = char_num
|
||||
model = build_model(config['Architecture'])
|
||||
|
||||
flops = paddle.flops(model, [1, 3, 640, 640])
|
||||
logger.info(f"FLOPs before pruning: {flops}")
|
||||
|
||||
from paddleslim.dygraph import FPGMFilterPruner
|
||||
model.train()
|
||||
pruner = FPGMFilterPruner(model, [1, 3, 640, 640])
|
||||
|
||||
# build metric
|
||||
eval_class = build_metric(config['Metric'])
|
||||
|
||||
def eval_fn():
|
||||
metric = program.eval(model, valid_dataloader, post_process_class,
|
||||
eval_class)
|
||||
logger.info(f"metric['hmean']: {metric['hmean']}")
|
||||
return metric['hmean']
|
||||
|
||||
params_sensitive = pruner.sensitive(
|
||||
eval_func=eval_fn,
|
||||
sen_file="./sen.pickle",
|
||||
skip_vars=[
|
||||
"conv2d_57.w_0", "conv2d_transpose_2.w_0", "conv2d_transpose_3.w_0"
|
||||
])
|
||||
|
||||
logger.info(
|
||||
"The sensitivity analysis results of model parameters saved in sen.pickle"
|
||||
)
|
||||
# calculate pruned params's ratio
|
||||
params_sensitive = pruner._get_ratios_by_loss(params_sensitive, loss=0.02)
|
||||
for key in params_sensitive.keys():
|
||||
logger.info(f"{key}, {params_sensitive[key]}")
|
||||
|
||||
plan = pruner.prune_vars(params_sensitive, [0])
|
||||
|
||||
flops = paddle.flops(model, [1, 3, 640, 640])
|
||||
logger.info(f"FLOPs after pruning: {flops}")
|
||||
|
||||
# load pretrain model
|
||||
pre_best_model_dict = init_model(config, model, logger, None)
|
||||
metric = program.eval(model, valid_dataloader, post_process_class,
|
||||
eval_class)
|
||||
logger.info(f"metric['hmean']: {metric['hmean']}")
|
||||
|
||||
# start export model
|
||||
from paddle.jit import to_static
|
||||
|
||||
infer_shape = [3, -1, -1]
|
||||
if config['Architecture']['model_type'] == "rec":
|
||||
infer_shape = [3, 32, -1] # for rec model, H must be 32
|
||||
|
||||
if 'Transform' in config['Architecture'] and config['Architecture'][
|
||||
'Transform'] is not None and config['Architecture'][
|
||||
'Transform']['name'] == 'TPS':
|
||||
logger.info(
|
||||
'When there is tps in the network, variable length input is not supported, and the input size needs to be the same as during training'
|
||||
)
|
||||
infer_shape[-1] = 100
|
||||
model = to_static(
|
||||
model,
|
||||
input_spec=[
|
||||
paddle.static.InputSpec(
|
||||
shape=[None] + infer_shape, dtype='float32')
|
||||
])
|
||||
|
||||
save_path = '{}/inference'.format(config['Global']['save_inference_dir'])
|
||||
paddle.jit.save(model, save_path)
|
||||
logger.info('inference model is saved to {}'.format(save_path))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
config, device, logger, vdl_writer = program.preprocess(is_train=True)
|
||||
main(config, device, logger, vdl_writer)
|
|
@ -0,0 +1,146 @@
|
|||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
__dir__ = os.path.dirname(__file__)
|
||||
sys.path.append(__dir__)
|
||||
sys.path.append(os.path.join(__dir__, '..', '..', '..'))
|
||||
sys.path.append(os.path.join(__dir__, '..', '..', '..', 'tools'))
|
||||
|
||||
import paddle
|
||||
import paddle.distributed as dist
|
||||
from ppocr.data import build_dataloader
|
||||
from ppocr.modeling.architectures import build_model
|
||||
from ppocr.losses import build_loss
|
||||
from ppocr.optimizer import build_optimizer
|
||||
from ppocr.postprocess import build_post_process
|
||||
from ppocr.metrics import build_metric
|
||||
from ppocr.utils.save_load import init_model
|
||||
import tools.program as program
|
||||
|
||||
dist.get_world_size()
|
||||
|
||||
|
||||
def get_pruned_params(parameters):
|
||||
params = []
|
||||
|
||||
for param in parameters:
|
||||
if len(
|
||||
param.shape
|
||||
) == 4 and 'depthwise' not in param.name and 'transpose' not in param.name and "conv2d_57" not in param.name and "conv2d_56" not in param.name:
|
||||
params.append(param.name)
|
||||
return params
|
||||
|
||||
|
||||
def main(config, device, logger, vdl_writer):
|
||||
# init dist environment
|
||||
if config['Global']['distributed']:
|
||||
dist.init_parallel_env()
|
||||
|
||||
global_config = config['Global']
|
||||
|
||||
# build dataloader
|
||||
train_dataloader = build_dataloader(config, 'Train', device, logger)
|
||||
if config['Eval']:
|
||||
valid_dataloader = build_dataloader(config, 'Eval', device, logger)
|
||||
else:
|
||||
valid_dataloader = None
|
||||
|
||||
# build post process
|
||||
post_process_class = build_post_process(config['PostProcess'],
|
||||
global_config)
|
||||
|
||||
# build model
|
||||
# for rec algorithm
|
||||
if hasattr(post_process_class, 'character'):
|
||||
char_num = len(getattr(post_process_class, 'character'))
|
||||
config['Architecture']["Head"]['out_channels'] = char_num
|
||||
model = build_model(config['Architecture'])
|
||||
|
||||
flops = paddle.flops(model, [1, 3, 640, 640])
|
||||
logger.info(f"FLOPs before pruning: {flops}")
|
||||
|
||||
from paddleslim.dygraph import FPGMFilterPruner
|
||||
model.train()
|
||||
pruner = FPGMFilterPruner(model, [1, 3, 640, 640])
|
||||
|
||||
# build loss
|
||||
loss_class = build_loss(config['Loss'])
|
||||
|
||||
# build optim
|
||||
optimizer, lr_scheduler = build_optimizer(
|
||||
config['Optimizer'],
|
||||
epochs=config['Global']['epoch_num'],
|
||||
step_each_epoch=len(train_dataloader),
|
||||
parameters=model.parameters())
|
||||
|
||||
# build metric
|
||||
eval_class = build_metric(config['Metric'])
|
||||
# load pretrain model
|
||||
pre_best_model_dict = init_model(config, model, logger, optimizer)
|
||||
|
||||
logger.info('train dataloader has {} iters, valid dataloader has {} iters'.
|
||||
format(len(train_dataloader), len(valid_dataloader)))
|
||||
# build metric
|
||||
eval_class = build_metric(config['Metric'])
|
||||
|
||||
logger.info('train dataloader has {} iters, valid dataloader has {} iters'.
|
||||
format(len(train_dataloader), len(valid_dataloader)))
|
||||
|
||||
def eval_fn():
|
||||
metric = program.eval(model, valid_dataloader, post_process_class,
|
||||
eval_class)
|
||||
logger.info(f"metric['hmean']: {metric['hmean']}")
|
||||
return metric['hmean']
|
||||
|
||||
params_sensitive = pruner.sensitive(
|
||||
eval_func=eval_fn,
|
||||
sen_file="./sen.pickle",
|
||||
skip_vars=[
|
||||
"conv2d_57.w_0", "conv2d_transpose_2.w_0", "conv2d_transpose_3.w_0"
|
||||
])
|
||||
|
||||
logger.info(
|
||||
"The sensitivity analysis results of model parameters saved in sen.pickle"
|
||||
)
|
||||
# calculate pruned params's ratio
|
||||
params_sensitive = pruner._get_ratios_by_loss(params_sensitive, loss=0.02)
|
||||
for key in params_sensitive.keys():
|
||||
logger.info(f"{key}, {params_sensitive[key]}")
|
||||
|
||||
plan = pruner.prune_vars(params_sensitive, [0])
|
||||
for param in model.parameters():
|
||||
if ("weights" in param.name and "conv" in param.name) or (
|
||||
"w_0" in param.name and "conv2d" in param.name):
|
||||
logger.info(f"{param.name}: {param.shape}")
|
||||
|
||||
flops = paddle.flops(model, [1, 3, 640, 640])
|
||||
logger.info(f"FLOPs after pruning: {flops}")
|
||||
|
||||
# start train
|
||||
|
||||
program.train(config, train_dataloader, valid_dataloader, device, model,
|
||||
loss_class, optimizer, lr_scheduler, post_process_class,
|
||||
eval_class, pre_best_model_dict, logger, vdl_writer)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
config, device, logger, vdl_writer = program.preprocess(is_train=True)
|
||||
main(config, device, logger, vdl_writer)
|
|
@ -42,7 +42,7 @@ python deploy/slim/quantization/quant.py -c configs/det/det_mv3_db.yml -o Global
|
|||
# 比如下载提供的训练模型
|
||||
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar
|
||||
tar -xf ch_ppocr_mobile_v2.0_det_train.tar
|
||||
python deploy/slim/quantization/quant.py -c configs/det/det_mv3_db.yml -o Global.pretrain_weights=./ch_ppocr_mobile_v2.0_det_train/best_accuracy Global.save_model_dir=./output/quant_model
|
||||
python deploy/slim/quantization/quant.py -c configs/det/det_mv3_db.yml -o Global.pretrain_weights=./ch_ppocr_mobile_v2.0_det_train/best_accuracy Global.save_inference_dir=./output/quant_inference_model
|
||||
|
||||
```
|
||||
如果要训练识别模型的量化,修改配置文件和加载的模型参数即可。
|
||||
|
|
|
@ -58,7 +58,7 @@ python deploy/slim/quantization/quant.py -c configs/det/det_mv3_db.yml -o Global
|
|||
After getting the model after pruning and finetuning we, can export it as inference_model for predictive deployment:
|
||||
|
||||
```bash
|
||||
python deploy/slim/quantization/export_model.py -c configs/det/det_mv3_db.yml -o Global.checkpoints=output/quant_model/best_accuracy Global.save_model_dir=./output/quant_inference_model
|
||||
python deploy/slim/quantization/export_model.py -c configs/det/det_mv3_db.yml -o Global.checkpoints=output/quant_model/best_accuracy Global.save_inference_dir=./output/quant_inference_model
|
||||
```
|
||||
|
||||
### 5. Deploy
|
||||
|
|
|
@ -9,42 +9,38 @@
|
|||
|
||||
## PaddleOCR常见问题汇总(持续更新)
|
||||
|
||||
* [近期更新(2021.1.18)](#近期更新)
|
||||
* [近期更新(2021.2.1)](#近期更新)
|
||||
* [【精选】OCR精选10个问题](#OCR精选10个问题)
|
||||
* [【理论篇】OCR通用32个问题](#OCR通用问题)
|
||||
* [基础知识7题](#基础知识)
|
||||
* [数据集7题](#数据集2)
|
||||
* [模型训练调优18题](#模型训练调优2)
|
||||
* [【实战篇】PaddleOCR实战110个问题](#PaddleOCR实战问题)
|
||||
* [使用咨询36题](#使用咨询)
|
||||
* [数据集17题](#数据集3)
|
||||
* [模型训练调优28题](#模型训练调优3)
|
||||
* [预测部署29题](#预测部署3)
|
||||
* [【实战篇】PaddleOCR实战120个问题](#PaddleOCR实战问题)
|
||||
* [使用咨询38题](#使用咨询)
|
||||
* [数据集18题](#数据集3)
|
||||
* [模型训练调优30题](#模型训练调优3)
|
||||
* [预测部署34题](#预测部署3)
|
||||
|
||||
|
||||
<a name="近期更新"></a>
|
||||
## 近期更新(2021.1.18)
|
||||
## 近期更新(2021.2.1)
|
||||
|
||||
#### Q3.2.18: PaddleOCR动态图版本如何finetune?
|
||||
**A**:finetune需要将配置文件里的 Global.load_static_weights设置为false,如果没有此字段可以手动添加,然后将模型地址放到Global.pretrained_model字段下即可。
|
||||
|
||||
|
||||
#### Q2.3.18: 在PP-OCR系统中,文本检测的骨干网络为什么没有使用SE模块?
|
||||
#### Q3.3.29: 微调v1.1预训练的模型,可以直接用文字垂直排列和上下颠倒的图片吗?还是必须要水平排列的?
|
||||
**A**:1.1和2.0的模型一样,微调时,垂直排列的文字需要逆时针旋转 90° 后加入训练,上下颠倒的需要旋转为水平的。
|
||||
|
||||
**A**:SE模块是MobileNetV3网络一个重要模块,目的是估计特征图每个特征通道重要性,给特征图每个特征分配权重,提高网络的表达能力。但是,对于文本检测,输入网络的分辨率比较大,一般是640\*640,利用SE模块估计特征图每个特征通道重要性比较困难,网络提升能力有限,但是该模块又比较耗时,因此在PP-OCR系统中,文本检测的骨干网络没有使用SE模块。实验也表明,当去掉SE模块,超轻量模型大小可以减小40%,文本检测效果基本不受影响。详细可以参考PP-OCR技术文章,https://arxiv.org/abs/2009.09941.
|
||||
#### Q3.3.30: 模型训练过程中如何得到 best_accuracy 模型?
|
||||
**A**:配置文件里的eval_batch_step字段用来控制多少次iter进行一次eval,在eval完成后会自动生成 best_accuracy 模型,所以如果希望很快就能拿到best_accuracy模型,可以将eval_batch_step改小一点(例如,10)。
|
||||
|
||||
#### Q3.3.27: PaddleOCR关于文本识别模型的训练,支持的数据增强方式有哪些?
|
||||
#### Q3.4.33: 如何多进程运行paddleocr?
|
||||
**A**:实例化多个paddleocr服务,然后将服务注册到注册中心,之后通过注册中心统一调度即可,关于注册中心,可以搜索eureka了解一下具体使用,其他的注册中心也行。
|
||||
|
||||
**A**:文本识别支持的数据增强方式有随机小幅度裁剪、图像平衡、添加白噪声、颜色漂移、图像反色和Text Image Augmentation(TIA)变换等。可以参考[代码](../../ppocr/data/imaug/rec_img_aug.py)中的warp函数。
|
||||
|
||||
#### Q3.3.28: 关于dygraph分支中,文本识别模型训练,要使用数据增强应该如何设置?
|
||||
|
||||
**A**:可以参考[配置文件](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)在Train['dataset']['transforms']添加RecAug字段,使数据增强生效。可以通过添加对aug_prob设置,表示每种数据增强采用的概率。aug_prob默认是0.4.由于tia数据增强特殊性,默认不采用,可以通过添加use_tia设置,使tia数据增强生效。详细设置可以参考[ISSUE 1744](https://github.com/PaddlePaddle/PaddleOCR/issues/1744)。
|
||||
|
||||
#### Q3.4.28: PP-OCR系统中,文本检测的结果有置信度吗?
|
||||
|
||||
**A**:文本检测的结果有置信度,由于推理过程中没有使用,所以没有显示的返回到最终结果中。如果需要文本检测结果的置信度,可以在[文本检测DB的后处理代码](../../ppocr/postprocess/db_postprocess.py)的155行,添加scores信息。这样,在[检测预测代码](../../tools/infer/predict_det.py)的197行,就可以拿到文本检测的scores信息。
|
||||
|
||||
#### Q3.4.29: DB文本检测,特征提取网络金字塔构建的部分代码在哪儿?
|
||||
|
||||
**A**:特征提取网络金字塔构建的部分:[代码位置](../../ppocr/modeling/necks/db_fpn.py)。ppocr/modeling文件夹里面是组网相关的代码,其中architectures是文本检测或者文本识别整体流程代码;backbones是骨干网络相关代码;necks是类似与FPN的颈函数代码;heads是提取文本检测或者文本识别预测结果相关的头函数;transforms是类似于TPS特征预处理模块。更多的信息可以参考[代码组织结构](./tree.md)。
|
||||
#### Q3.4.34: 2.0训练出来的模型,能否在1.1版本上进行部署?
|
||||
**A**:这个是不建议的,2.0训练出来的模型建议使用dygraph分支里提供的部署代码。
|
||||
|
||||
<a name="OCR精选10个问题"></a>
|
||||
## 【精选】OCR精选10个问题
|
||||
|
@ -365,13 +361,13 @@
|
|||
(2)inference模型下载时,如果没有安装wget,可直接点击模型链接或将链接地址复制到浏览器进行下载,并解压放置到相应目录。
|
||||
|
||||
#### Q3.1.17:PaddleOCR开源的超轻量模型和通用OCR模型的区别?
|
||||
**A**:目前PaddleOCR开源了2个中文模型,分别是8.6M超轻量中文模型和通用中文OCR模型。两者对比信息如下:
|
||||
**A**:目前PaddleOCR开源了2个中文模型,分别是9.4M超轻量中文模型和通用中文OCR模型。两者对比信息如下:
|
||||
- 相同点:两者使用相同的**算法**和**训练数据**;
|
||||
- 不同点:不同之处在于**骨干网络**和**通道参数**,超轻量模型使用MobileNetV3作为骨干网络,通用模型使用Resnet50_vd作为检测模型backbone,Resnet34_vd作为识别模型backbone,具体参数差异可对比两种模型训练的配置文件.
|
||||
|
||||
|模型|骨干网络|检测训练配置|识别训练配置|
|
||||
|-|-|-|-|
|
||||
|8.6M超轻量中文OCR模型|MobileNetV3+MobileNetV3|det_mv3_db.yml|rec_chinese_lite_train.yml|
|
||||
|9.4M超轻量中文OCR模型|MobileNetV3+MobileNetV3|det_mv3_db.yml|rec_chinese_lite_train.yml|
|
||||
|通用中文OCR模型|Resnet50_vd+Resnet34_vd|det_r50_vd_db.yml|rec_chinese_common_train.yml|
|
||||
|
||||
#### Q3.1.18:如何加入自己的检测算法?
|
||||
|
@ -396,13 +392,13 @@
|
|||
**A**:动态图版本正在紧锣密鼓开发中,将于2020年12月16日发布,敬请关注。
|
||||
|
||||
#### Q3.1.22:ModuleNotFoundError: No module named 'paddle.nn',
|
||||
**A**:paddle.nn是Paddle2.0版本特有的功能,请安装大于等于Paddle 2.0.0rc1的版本,安装方式为
|
||||
**A**:paddle.nn是Paddle2.0版本特有的功能,请安装大于等于Paddle 2.0.0的版本,安装方式为
|
||||
```
|
||||
python3 -m pip install paddlepaddle-gpu==2.0.0rc1 -i https://mirror.baidu.com/pypi/simple
|
||||
python3 -m pip install paddlepaddle-gpu==2.0.0 -i https://mirror.baidu.com/pypi/simple
|
||||
```
|
||||
|
||||
#### Q3.1.23: ImportError: /usr/lib/x86_64_linux-gnu/libstdc++.so.6:version `CXXABI_1.3.11` not found (required by /usr/lib/python3.6/site-package/paddle/fluid/core+avx.so)
|
||||
**A**:这个问题是glibc版本不足导致的,Paddle2.0rc1版本对gcc版本和glib版本有更高的要求,推荐gcc版本为8.2,glibc版本2.12以上。
|
||||
**A**:这个问题是glibc版本不足导致的,Paddle2.0.0版本对gcc版本和glib版本有更高的要求,推荐gcc版本为8.2,glibc版本2.12以上。
|
||||
如果您的环境不满足这个要求,或者使用的docker镜像为:
|
||||
`hub.baidubce.com/paddlepaddle/paddle:latest-gpu-cuda9.0-cudnn7-dev`
|
||||
`hub.baidubce.com/paddlepaddle/paddle:latest-gpu-cuda9.0-cudnn7-dev`,安装Paddle2.0rc版本可能会出现上述错误,2.0版本推荐使用新的docker镜像 `paddlepaddle/paddle:latest-dev-cuda10.1-cudnn7-gcc82`。
|
||||
|
@ -414,7 +410,7 @@ python3 -m pip install paddlepaddle-gpu==2.0.0rc1 -i https://mirror.baidu.com/py
|
|||
|
||||
- develop:基于Paddle静态图开发的分支,推荐使用paddle1.8 或者2.0版本,该分支具备完善的模型训练、预测、推理部署、量化裁剪等功能,领先于release/1.1分支。
|
||||
- release/1.1:PaddleOCR 发布的第一个稳定版本,基于静态图开发,具备完善的训练、预测、推理部署、量化裁剪等功能。
|
||||
- dygraph:基于Paddle动态图开发的分支,目前仍在开发中,未来将作为主要开发分支,运行要求使用Paddle2.0rc1版本,目前仍在开发中。
|
||||
- dygraph:基于Paddle动态图开发的分支,目前仍在开发中,未来将作为主要开发分支,运行要求使用Paddle2.0.0版本。
|
||||
- release/2.0-rc1-0:PaddleOCR发布的第二个稳定版本,基于动态图和paddle2.0版本开发,动态图开发的工程更易于调试,目前支,支持模型训练、预测,暂不支持移动端部署。
|
||||
|
||||
如果您已经上手过PaddleOCR,并且希望在各种环境上部署PaddleOCR,目前建议使用静态图分支,develop或者release/1.1分支。如果您是初学者,想快速训练,调试PaddleOCR中的算法,建议尝鲜PaddleOCR dygraph分支。
|
||||
|
@ -431,7 +427,7 @@ python3 -m pip install paddlepaddle-gpu==2.0.0rc1 -i https://mirror.baidu.com/py
|
|||
|
||||
#### Q3.1.27: 如何可视化acc,loss曲线图,模型网络结构图等?
|
||||
|
||||
**A**:在配置文件里有`use_visualdl`的参数,设置为True即可,更多的使用命令可以参考:[VisualDL使用指南](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc1/guides/03_VisualDL/visualdl.html)。
|
||||
**A**:在配置文件里有`use_visualdl`的参数,设置为True即可,更多的使用命令可以参考:[VisualDL使用指南](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/guides/03_VisualDL/visualdl.html)。
|
||||
|
||||
#### Q3.1.28: 在使用StyleText数据合成工具的时候,报错`ModuleNotFoundError: No module named 'utils.config'`,这是为什么呢?
|
||||
|
||||
|
@ -450,7 +446,7 @@ https://github.com/PaddlePaddle/PaddleOCR/blob/de3e2e7cd3b8b65ee02d7a41e570fa5b5
|
|||
|
||||
#### Q3.1.31: 怎么输出网络结构以及每层的参数信息?
|
||||
|
||||
**A**:可以使用 `paddle.summary`, 具体参考:https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc1/api/paddle/hapi/model_summary/summary_cn.html#summary。
|
||||
**A**:可以使用 `paddle.summary`, 具体参考:https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api/paddle/hapi/model_summary/summary_cn.html。
|
||||
|
||||
#### Q3.1.32 能否修改StyleText配置文件中的分辨率?
|
||||
|
||||
|
@ -474,9 +470,18 @@ StyleText的用途主要是:提取style_image中的字体、背景等style信
|
|||
例如识别身份证照片,可以先匹配"姓名","性别"等关键字,根据这些关键字的坐标去推测其他信息的位置,再与识别的结果匹配。
|
||||
|
||||
#### Q3.1.36 如何识别竹简上的古文?
|
||||
|
||||
**A**:对于字符都是普通的汉字字符的情况,只要标注足够的数据,finetune模型就可以了。如果数据量不足,您可以尝试StyleText工具。
|
||||
而如果使用的字符是特殊的古文字、甲骨文、象形文字等,那么首先需要构建一个古文字的字典,之后再进行训练。
|
||||
|
||||
#### Q3.1.37: 小语种模型只有识别模型,没有检测模型吗?
|
||||
|
||||
**A**:小语种(包括纯英文数字)的检测模型和中文的检测模型是共用的,在训练中文检测模型时加入了多语言数据。https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_en/models_list_en.md#1-text-detection-model。
|
||||
|
||||
#### Q3.1.38: module 'paddle.distributed' has no attribute ‘get_rank’。
|
||||
|
||||
**A**:Paddle版本问题,请安装2.0版本Paddle:pip install paddlepaddle==2.0.0。
|
||||
|
||||
<a name="数据集3"></a>
|
||||
### 数据集
|
||||
|
||||
|
@ -568,6 +573,9 @@ StyleText的用途主要是:提取style_image中的字体、背景等style信
|
|||
|
||||
**A**:PPOCRLabel可运行于Linux、Windows、MacOS等多种系统。操作步骤可以参考文档,https://github.com/PaddlePaddle/PaddleOCR/blob/develop/PPOCRLabel/README.md
|
||||
|
||||
#### Q3.2.18: PaddleOCR动态图版本如何finetune?
|
||||
**A**:finetune需要将配置文件里的 Global.load_static_weights设置为false,如果没有此字段可以手动添加,然后将模型地址放到Global.pretrained_model字段下即可。
|
||||
|
||||
<a name="模型训练调优3"></a>
|
||||
|
||||
### 模型训练调优
|
||||
|
@ -713,6 +721,12 @@ ps -axu | grep train.py | awk '{print $2}' | xargs kill -9
|
|||
|
||||
**A**:可以参考[配置文件](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)在Train['dataset']['transforms']添加RecAug字段,使数据增强生效。可以通过添加对aug_prob设置,表示每种数据增强采用的概率。aug_prob默认是0.4.由于tia数据增强特殊性,默认不采用,可以通过添加use_tia设置,使tia数据增强生效。详细设置可以参考[ISSUE 1744](https://github.com/PaddlePaddle/PaddleOCR/issues/1744)。
|
||||
|
||||
#### Q3.3.29: 微调v1.1预训练的模型,可以直接用文字垂直排列和上下颠倒的图片吗?还是必须要水平排列的?
|
||||
**A**:1.1和2.0的模型一样,微调时,垂直排列的文字需要逆时针旋转 90°后加入训练,上下颠倒的需要旋转为水平的。
|
||||
|
||||
#### Q3.3.30: 模型训练过程中如何得到 best_accuracy 模型?
|
||||
**A**:配置文件里的eval_batch_step字段用来控制多少次iter进行一次eval,在eval完成后会自动生成 best_accuracy 模型,所以如果希望很快就能拿到best_accuracy模型,可以将eval_batch_step改小一点,如改为[10,10],这样表示第10次迭代后,以后没隔10个迭代就进行一次模型的评估。
|
||||
|
||||
<a name="预测部署3"></a>
|
||||
|
||||
### 预测部署
|
||||
|
@ -854,3 +868,24 @@ img = cv.imdecode(img_array, -1)
|
|||
#### Q3.4.29: DB文本检测,特征提取网络金字塔构建的部分代码在哪儿?
|
||||
|
||||
**A**:特征提取网络金字塔构建的部分:[代码位置](../../ppocr/modeling/necks/db_fpn.py)。ppocr/modeling文件夹里面是组网相关的代码,其中architectures是文本检测或者文本识别整体流程代码;backbones是骨干网络相关代码;necks是类似与FPN的颈函数代码;heads是提取文本检测或者文本识别预测结果相关的头函数;transforms是类似于TPS特征预处理模块。更多的信息可以参考[代码组织结构](./tree.md)。
|
||||
|
||||
#### Q3.4.30: PaddleOCR是否支持在华为鲲鹏920CPU上部署?
|
||||
|
||||
**A**:目前Paddle的预测库是支持华为鲲鹏920CPU的,但是OCR还没在这些芯片上测试过,可以自己调试,有问题反馈给我们。
|
||||
|
||||
#### Q3.4.31: 采用Paddle-Lite进行端侧部署,出现问题,环境没问题。
|
||||
|
||||
**A**:如果你的预测库是自己编译的,那么你的nb文件也要自己编译,用同一个lite版本。不能直接用下载的nb文件,因为版本不同。
|
||||
|
||||
#### Q3.4.32: PaddleOCR的模型支持onnx转换吗?
|
||||
|
||||
**A**:我们目前已经通过Paddle2ONNX来支持各模型套件的转换,PaddleOCR基于PaddlePaddle 2.0的版本(dygraph分支)已经支持导出为ONNX,欢迎关注Paddle2ONNX,了解更多项目的进展:
|
||||
Paddle2ONNX项目:https://github.com/PaddlePaddle/Paddle2ONNX
|
||||
Paddle2ONNX支持转换的[模型列表](https://github.com/PaddlePaddle/Paddle2ONNX/blob/develop/docs/zh/model_zoo.md#%E5%9B%BE%E5%83%8Focr)
|
||||
|
||||
|
||||
#### Q3.4.33: 如何多进程运行paddleocr?
|
||||
**A**:实例化多个paddleocr服务,然后将服务注册到注册中心,之后通过注册中心统一调度即可,关于注册中心,可以搜索eureka了解一下具体使用,其他的注册中心也行。
|
||||
|
||||
#### Q3.4.34: 2.0训练出来的模型,能否在1.1版本上进行部署?
|
||||
**A**:这个是不建议的,2.0训练出来的模型建议使用dygraph分支里提供的部署代码。
|
||||
|
|
|
@ -40,8 +40,8 @@ PaddleOCR基于动态图开源的文本识别算法列表:
|
|||
- [x] CRNN([paper](https://arxiv.org/abs/1507.05717))[7](ppocr推荐)
|
||||
- [x] Rosetta([paper](https://arxiv.org/abs/1910.05085))[10]
|
||||
- [x] STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html))[11]
|
||||
- [ ] RARE([paper](https://arxiv.org/abs/1603.03915v1))[12] coming soon
|
||||
- [ ] SRN([paper](https://arxiv.org/abs/2003.12294))[5] coming soon
|
||||
- [x] RARE([paper](https://arxiv.org/abs/1603.03915v1))[12]
|
||||
- [x] SRN([paper](https://arxiv.org/abs/2003.12294))[5]
|
||||
|
||||
参考[DTRB][3](https://arxiv.org/abs/1904.01906)文字识别训练和评估流程,使用MJSynth和SynthText两个文字识别数据集训练,在IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE数据集上进行评估,算法效果如下:
|
||||
|
||||
|
@ -53,5 +53,9 @@ PaddleOCR基于动态图开源的文本识别算法列表:
|
|||
|CRNN|MobileNetV3|79.97%|rec_mv3_none_bilstm_ctc|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_bilstm_ctc_v2.0_train.tar)|
|
||||
|StarNet|Resnet34_vd|84.44%|rec_r34_vd_tps_bilstm_ctc|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_tps_bilstm_ctc_v2.0_train.tar)|
|
||||
|StarNet|MobileNetV3|81.42%|rec_mv3_tps_bilstm_ctc|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_tps_bilstm_ctc_v2.0_train.tar)|
|
||||
|RARE|MobileNetV3|82.5%|rec_mv3_tps_bilstm_att |[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_tps_bilstm_att_v2.0_train.tar)|
|
||||
|RARE|Resnet34_vd|83.6%|rec_r34_vd_tps_bilstm_att |[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_tps_bilstm_att_v2.0_train.tar)|
|
||||
|SRN|Resnet50_vd_fpn| 88.52% | rec_r50fpn_vd_none_srn | [下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r50_vd_srn_train.tar) |
|
||||
|
||||
|
||||
PaddleOCR文本识别算法的训练和使用请参考文档教程中[模型训练/评估中的文本识别部分](./recognition.md)。
|
||||
|
|
|
@ -1,4 +1,12 @@
|
|||
## 文字角度分类
|
||||
### 方法介绍
|
||||
文字角度分类主要用于图片非0度的场景下,在这种场景下需要对图片里检测到的文本行进行一个转正的操作。在PaddleOCR系统内,
|
||||
文字检测之后得到的文本行图片经过仿射变换之后送入识别模型,此时只需要对文字进行一个0和180度的角度分类,因此PaddleOCR内置的
|
||||
文字角度分类器**只支持了0和180度的分类**。如果想支持更多角度,可以自己修改算法进行支持。
|
||||
|
||||
0和180度数据样本例子:
|
||||
|
||||
![](../imgs_results/angle_class_example.jpg)
|
||||
|
||||
### 数据准备
|
||||
|
||||
|
@ -13,7 +21,7 @@ ln -sf <path/to/dataset> <path/to/paddle_ocr>/train_data/cls/dataset
|
|||
请参考下文组织您的数据。
|
||||
- 训练集
|
||||
|
||||
首先请将训练图片放入同一个文件夹(train_images),并用一个txt文件(cls_gt_train.txt)记录图片路径和标签。
|
||||
首先建议将训练图片放入同一个文件夹,并用一个txt文件(cls_gt_train.txt)记录图片路径和标签。
|
||||
|
||||
**注意:** 默认请将图片路径和图片标签用 `\t` 分割,如用其他方式分割将造成训练报错
|
||||
|
||||
|
@ -21,8 +29,8 @@ ln -sf <path/to/dataset> <path/to/paddle_ocr>/train_data/cls/dataset
|
|||
|
||||
```
|
||||
" 图像文件名 图像标注信息 "
|
||||
train/word_001.jpg 0
|
||||
train/word_002.jpg 180
|
||||
train/cls/train/word_001.jpg 0
|
||||
train/cls/train/word_002.jpg 180
|
||||
```
|
||||
|
||||
最终训练集应有如下文件结构:
|
||||
|
@ -63,7 +71,7 @@ PaddleOCR提供了训练脚本、评估脚本和预测脚本。
|
|||
*如果您安装的是cpu版本,请将配置文件中的 `use_gpu` 字段修改为false*
|
||||
|
||||
```
|
||||
# GPU训练 支持单卡,多卡训练,通过 '--gpus' 指定卡号,如果使用的paddle版本小于2.0rc1,请使用'--select_gpus'参数选择要使用的GPU
|
||||
# GPU训练 支持单卡,多卡训练,通过 '--gpus' 指定卡号。
|
||||
# 启动训练,下面的命令已经写入train.sh文件中,只需修改文件里的配置文件路径即可
|
||||
python3 -m paddle.distributed.launch --gpus '0,1,2,3,4,5,6,7' tools/train.py -c configs/cls/cls_mv3.yml
|
||||
```
|
||||
|
|
|
@ -76,7 +76,7 @@ tar -xf ./pretrain_models/MobileNetV3_large_x0_5_pretrained.tar ./pretrain_model
|
|||
# 单机单卡训练 mv3_db 模型
|
||||
python3 tools/train.py -c configs/det/det_mv3_db.yml \
|
||||
-o Global.pretrain_weights=./pretrain_models/MobileNetV3_large_x0_5_pretrained/
|
||||
# 单机多卡训练,通过 --gpus 参数设置使用的GPU ID;如果使用的paddle版本小于2.0rc1,请使用'--select_gpus'参数选择要使用的GPU
|
||||
# 单机多卡训练,通过 --gpus 参数设置使用的GPU ID
|
||||
python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/det/det_mv3_db.yml \
|
||||
-o Global.pretrain_weights=./pretrain_models/MobileNetV3_large_x0_5_pretrained/
|
||||
```
|
||||
|
|
|
@ -2,10 +2,11 @@
|
|||
# 基于Python预测引擎推理
|
||||
|
||||
inference 模型(`paddle.jit.save`保存的模型)
|
||||
一般是模型训练完成后保存的固化模型,多用于预测部署。训练过程中保存的模型是checkpoints模型,保存的是模型的参数,多用于恢复训练等。
|
||||
与checkpoints模型相比,inference 模型会额外保存模型的结构信息,在预测部署、加速推理上性能优越,灵活方便,适合与实际系统集成。
|
||||
一般是模型训练,把模型结构和模型参数保存在文件中的固化模型,多用于预测部署场景。
|
||||
训练过程中保存的模型是checkpoints模型,保存的只有模型的参数,多用于恢复训练等。
|
||||
与checkpoints模型相比,inference 模型会额外保存模型的结构信息,在预测部署、加速推理上性能优越,灵活方便,适合于实际系统集成。
|
||||
|
||||
接下来首先介绍如何将训练的模型转换成inference模型,然后将依次介绍文本检测、文本角度分类器、文本识别以及三者串联基于预测引擎推理。
|
||||
接下来首先介绍如何将训练的模型转换成inference模型,然后将依次介绍文本检测、文本角度分类器、文本识别以及三者串联在CPU、GPU上的预测方法。
|
||||
|
||||
|
||||
- [一、训练模型转inference模型](#训练模型转inference模型)
|
||||
|
@ -22,8 +23,9 @@ inference 模型(`paddle.jit.save`保存的模型)
|
|||
- [三、文本识别模型推理](#文本识别模型推理)
|
||||
- [1. 超轻量中文识别模型推理](#超轻量中文识别模型推理)
|
||||
- [2. 基于CTC损失的识别模型推理](#基于CTC损失的识别模型推理)
|
||||
- [3. 自定义文本识别字典的推理](#自定义文本识别字典的推理)
|
||||
- [4. 多语言模型的推理](#多语言模型的推理)
|
||||
- [3. 基于SRN损失的识别模型推理](#基于SRN损失的识别模型推理)
|
||||
- [4. 自定义文本识别字典的推理](#自定义文本识别字典的推理)
|
||||
- [5. 多语言模型的推理](#多语言模型的推理)
|
||||
|
||||
- [四、方向分类模型推理](#方向识别模型推理)
|
||||
- [1. 方向分类模型推理](#方向分类模型推理)
|
||||
|
@ -139,7 +141,7 @@ python3 tools/infer/predict_det.py --image_dir="./doc/imgs/00018069.jpg" --det_m
|
|||
![](../imgs_results/det_res_00018069.jpg)
|
||||
|
||||
通过参数`limit_type`和`det_limit_side_len`来对图片的尺寸进行限制,
|
||||
`litmit_type`可选参数为[`max`, `min`],
|
||||
`limit_type`可选参数为[`max`, `min`],
|
||||
`det_limit_size_len` 为正整数,一般设置为32 的倍数,比如960。
|
||||
|
||||
参数默认设置为`limit_type='max', det_limit_side_len=960`。表示网络输入图像的最长边不能超过960,
|
||||
|
@ -295,8 +297,20 @@ Predicts of ./doc/imgs_words_en/word_336.png:('super', 0.9999073)
|
|||
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
|
||||
dict_character = list(self.character_str)
|
||||
```
|
||||
<a name="基于SRN损失的识别模型推理"></a>
|
||||
### 3. 基于SRN损失的识别模型推理
|
||||
基于SRN损失的识别模型,需要额外设置识别算法参数 --rec_algorithm="SRN"。
|
||||
同时需要保证预测shape与训练时一致,如: --rec_image_shape="1, 64, 256"
|
||||
|
||||
### 3. 自定义文本识别字典的推理
|
||||
```
|
||||
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" \
|
||||
--rec_model_dir="./inference/srn/" \
|
||||
--rec_image_shape="1, 64, 256" \
|
||||
--rec_char_type="en" \
|
||||
--rec_algorithm="SRN"
|
||||
```
|
||||
|
||||
### 4. 自定义文本识别字典的推理
|
||||
如果训练时修改了文本的字典,在使用inference模型预测时,需要通过`--rec_char_dict_path`指定使用的字典路径,并且设置 `rec_char_type=ch`
|
||||
|
||||
```
|
||||
|
@ -304,12 +318,12 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png
|
|||
```
|
||||
|
||||
<a name="多语言模型的推理"></a>
|
||||
### 4. 多语言模型的推理
|
||||
### 5. 多语言模型的推理
|
||||
如果您需要预测的是其他语言模型,在使用inference模型预测时,需要通过`--rec_char_dict_path`指定使用的字典路径, 同时为了得到正确的可视化结果,
|
||||
需要通过 `--vis_font_path` 指定可视化的字体路径,`doc/` 路径下有默认提供的小语种字体,例如韩文识别:
|
||||
需要通过 `--vis_font_path` 指定可视化的字体路径,`doc/fonts/` 路径下有默认提供的小语种字体,例如韩文识别:
|
||||
|
||||
```
|
||||
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_type="korean" --rec_char_dict_path="ppocr/utils/dict/korean_dict.txt" --vis_font_path="doc/korean.ttf"
|
||||
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_type="korean" --rec_char_dict_path="ppocr/utils/dict/korean_dict.txt" --vis_font_path="doc/fonts/korean.ttf"
|
||||
```
|
||||
![](../imgs_words/korean/1.jpg)
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
经测试PaddleOCR可在glibc 2.23上运行,您也可以测试其他glibc版本或安装glic 2.23
|
||||
PaddleOCR 工作环境
|
||||
- PaddlePaddle 1.8+ ,推荐使用 PaddlePaddle 2.0rc1
|
||||
- PaddlePaddle 2.0.0
|
||||
- python3.7
|
||||
- glibc 2.23
|
||||
- cuDNN 7.6+ (GPU)
|
||||
|
@ -30,16 +30,16 @@ sudo nvidia-docker run --name ppocr -v $PWD:/paddle --shm-size=64G --network=hos
|
|||
sudo docker container exec -it ppocr /bin/bash
|
||||
```
|
||||
|
||||
**2. 安装PaddlePaddle Fluid v2.0**
|
||||
**2. 安装PaddlePaddle 2.0**
|
||||
```
|
||||
pip3 install --upgrade pip
|
||||
|
||||
如果您的机器安装的是CUDA9或CUDA10,请运行以下命令安装
|
||||
python3 -m pip install paddlepaddle-gpu==2.0.0rc1 -i https://mirror.baidu.com/pypi/simple
|
||||
python3 -m pip install paddlepaddle-gpu==2.0.0 -i https://mirror.baidu.com/pypi/simple
|
||||
|
||||
如果您的机器是CPU,请运行以下命令安装
|
||||
|
||||
python3 -m pip install paddlepaddle==2.0.0rc1 -i https://mirror.baidu.com/pypi/simple
|
||||
python3 -m pip install paddlepaddle==2.0.0 -i https://mirror.baidu.com/pypi/simple
|
||||
|
||||
更多的版本需求,请参照[安装文档](https://www.paddlepaddle.org.cn/install/quick)中的说明进行操作。
|
||||
```
|
||||
|
|
|
@ -1,5 +1,9 @@
|
|||
## OCR模型列表(V2.0,2021年1月20日更新)
|
||||
**说明** :2.0版模型和[1.1版模型](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/models_list.md)的主要区别在于动态图训练vs.静态图训练,模型性能上无明显差距。
|
||||
|
||||
> **说明**
|
||||
> 1. 2.0版模型和[1.1版模型](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/models_list.md)的主要区别在于动态图训练vs.静态图训练,模型性能上无明显差距。
|
||||
> 2. 本文档提供的是PPOCR自研模型列表,更多基于公开数据集的算法介绍与预训练模型可以参考:[算法概览文档](./algorithm_overview.md)。
|
||||
|
||||
|
||||
- [一、文本检测模型](#文本检测模型)
|
||||
- [二、文本识别模型](#文本识别模型)
|
||||
|
@ -12,9 +16,14 @@ PaddleOCR提供的可下载模型包括`推理模型`、`训练模型`、`预训
|
|||
|
||||
|模型类型|模型格式|简介|
|
||||
|--- | --- | --- |
|
||||
|推理模型|inference.pdmodel、inference.pdiparams|用于python预测引擎推理,[详情](./inference.md)|
|
||||
|推理模型|inference.pdmodel、inference.pdiparams|用于预测引擎推理,[详情](./inference.md)|
|
||||
|训练模型、预训练模型|\*.pdparams、\*.pdopt、\*.states |训练过程中保存的模型的参数、优化器状态和训练中间信息,多用于模型指标评估和恢复训练|
|
||||
|slim模型|\*.nb|用于lite部署|
|
||||
|slim模型|\*.nb|经过飞桨模型压缩工具PaddleSlim压缩后的模型,适用于移动端/IoT端等端侧部署场景(需使用飞桨Paddle Lite部署)。|
|
||||
|
||||
|
||||
各个模型的关系如下面的示意图所示。
|
||||
|
||||
![](../imgs/model_prod_flow_ch.png)
|
||||
|
||||
|
||||
<a name="文本检测模型"></a>
|
||||
|
|
|
@ -1,60 +1,94 @@
|
|||
## 文字识别
|
||||
|
||||
|
||||
- [一、数据准备](#数据准备)
|
||||
- [数据下载](#数据下载)
|
||||
- [自定义数据集](#自定义数据集)
|
||||
- [字典](#字典)
|
||||
- [支持空格](#支持空格)
|
||||
- [1 数据准备](#数据准备)
|
||||
- [1.1 自定义数据集](#自定义数据集)
|
||||
- [1.2 数据下载](#数据下载)
|
||||
- [1.3 字典](#字典)
|
||||
- [1.4 支持空格](#支持空格)
|
||||
|
||||
- [二、启动训练](#启动训练)
|
||||
- [1. 数据增强](#数据增强)
|
||||
- [2. 训练](#训练)
|
||||
- [3. 小语种](#小语种)
|
||||
- [2 启动训练](#启动训练)
|
||||
- [2.1 数据增强](#数据增强)
|
||||
- [2.2 训练](#训练)
|
||||
- [2.3 小语种](#小语种)
|
||||
|
||||
- [三、评估](#评估)
|
||||
- [3 评估](#评估)
|
||||
|
||||
- [四、预测](#预测)
|
||||
- [1. 训练引擎预测](#训练引擎预测)
|
||||
- [4 预测](#预测)
|
||||
- [4.1 训练引擎预测](#训练引擎预测)
|
||||
|
||||
|
||||
<a name="数据准备"></a>
|
||||
### 数据准备
|
||||
### 1. 数据准备
|
||||
|
||||
|
||||
PaddleOCR 支持两种数据格式: `lmdb` 用于训练公开数据,调试算法; `通用数据` 训练自己的数据:
|
||||
|
||||
请按如下步骤设置数据集:
|
||||
PaddleOCR 支持两种数据格式:
|
||||
- `lmdb` 用于训练以lmdb格式存储的数据集;
|
||||
- `通用数据` 用于训练以文本文件存储的数据集:
|
||||
|
||||
训练数据的默认存储路径是 `PaddleOCR/train_data`,如果您的磁盘上已有数据集,只需创建软链接至数据集目录:
|
||||
|
||||
```
|
||||
# linux and mac os
|
||||
ln -sf <path/to/dataset> <path/to/paddle_ocr>/train_data/dataset
|
||||
# windows
|
||||
mklink /d <path/to/paddle_ocr>/train_data/dataset <path/to/dataset>
|
||||
```
|
||||
|
||||
<a name="数据下载"></a>
|
||||
* 数据下载
|
||||
<a name="准备数据集"></a>
|
||||
#### 1.1 自定义数据集
|
||||
下面以通用数据集为例, 介绍如何准备数据集:
|
||||
|
||||
若您本地没有数据集,可以在官网下载 [icdar2015](http://rrc.cvc.uab.es/?ch=4&com=downloads) 数据,用于快速验证。也可以参考[DTRB](https://github.com/clovaai/deep-text-recognition-benchmark#download-lmdb-dataset-for-traininig-and-evaluation-from-here),下载 benchmark 所需的lmdb格式数据集。
|
||||
* 训练集
|
||||
|
||||
<a name="自定义数据集"></a>
|
||||
* 使用自己数据集
|
||||
建议将训练图片放入同一个文件夹,并用一个txt文件(rec_gt_train.txt)记录图片路径和标签,txt文件里的内容如下:
|
||||
|
||||
若您希望使用自己的数据进行训练,请参考下文组织您的数据。
|
||||
|
||||
- 训练集
|
||||
|
||||
首先请将训练图片放入同一个文件夹(train_images),并用一个txt文件(rec_gt_train.txt)记录图片路径和标签。
|
||||
|
||||
**注意:** 默认请将图片路径和图片标签用 \t 分割,如用其他方式分割将造成训练报错
|
||||
**注意:** txt文件中默认请将图片路径和图片标签用 \t 分割,如用其他方式分割将造成训练报错。
|
||||
|
||||
```
|
||||
" 图像文件名 图像标注信息 "
|
||||
|
||||
train_data/train_0001.jpg 简单可依赖
|
||||
train_data/train_0002.jpg 用科技让复杂的世界更简单
|
||||
train_data/rec/train/word_001.jpg 简单可依赖
|
||||
train_data/rec/train/word_002.jpg 用科技让复杂的世界更简单
|
||||
...
|
||||
```
|
||||
PaddleOCR 提供了一份用于训练 icdar2015 数据集的标签文件,通过以下方式下载:
|
||||
|
||||
最终训练集应有如下文件结构:
|
||||
```
|
||||
|-train_data
|
||||
|-rec
|
||||
|- rec_gt_train.txt
|
||||
|- train
|
||||
|- word_001.png
|
||||
|- word_002.jpg
|
||||
|- word_003.jpg
|
||||
| ...
|
||||
```
|
||||
|
||||
- 测试集
|
||||
|
||||
同训练集类似,测试集也需要提供一个包含所有图片的文件夹(test)和一个rec_gt_test.txt,测试集的结构如下所示:
|
||||
|
||||
```
|
||||
|-train_data
|
||||
|-rec
|
||||
|- rec_gt_test.txt
|
||||
|- test
|
||||
|- word_001.jpg
|
||||
|- word_002.jpg
|
||||
|- word_003.jpg
|
||||
| ...
|
||||
```
|
||||
|
||||
<a name="数据下载"></a>
|
||||
|
||||
1.2 数据下载
|
||||
|
||||
若您本地没有数据集,可以在官网下载 [icdar2015](http://rrc.cvc.uab.es/?ch=4&com=downloads) 数据,用于快速验证。也可以参考[DTRB](https://github.com/clovaai/deep-text-recognition-benchmark#download-lmdb-dataset-for-traininig-and-evaluation-from-here) ,下载 benchmark 所需的lmdb格式数据集。
|
||||
|
||||
如果你使用的是icdar2015的公开数据集,PaddleOCR 提供了一份用于训练 icdar2015 数据集的标签文件,通过以下方式下载:
|
||||
|
||||
如果希望复现SRN的论文指标,需要下载离线[增广数据](https://pan.baidu.com/s/1-HSZ-ZVdqBF2HaBZ5pRAKA),提取码: y3ry。增广数据是由MJSynth和SynthText做旋转和扰动得到的。数据下载完成后请解压到 {your_path}/PaddleOCR/train_data/data_lmdb_release/training/ 路径下。
|
||||
|
||||
```
|
||||
# 训练集标签
|
||||
|
@ -70,34 +104,8 @@ PaddleOCR 也提供了数据格式转换脚本,可以将官网 label 转换支
|
|||
python gen_label.py --mode="rec" --input_path="{path/of/origin/label}" --output_label="rec_gt_label.txt"
|
||||
```
|
||||
|
||||
最终训练集应有如下文件结构:
|
||||
```
|
||||
|-train_data
|
||||
|-ic15_data
|
||||
|- rec_gt_train.txt
|
||||
|- train
|
||||
|- word_001.png
|
||||
|- word_002.jpg
|
||||
|- word_003.jpg
|
||||
| ...
|
||||
```
|
||||
|
||||
- 测试集
|
||||
|
||||
同训练集类似,测试集也需要提供一个包含所有图片的文件夹(test)和一个rec_gt_test.txt,测试集的结构如下所示:
|
||||
|
||||
```
|
||||
|-train_data
|
||||
|-ic15_data
|
||||
|- rec_gt_test.txt
|
||||
|- test
|
||||
|- word_001.jpg
|
||||
|- word_002.jpg
|
||||
|- word_003.jpg
|
||||
| ...
|
||||
```
|
||||
<a name="字典"></a>
|
||||
- 字典
|
||||
1.3 字典
|
||||
|
||||
最后需要提供一个字典({word_dict_name}.txt),使模型在训练时,可以将所有出现的字符映射为字典的索引。
|
||||
|
||||
|
@ -114,6 +122,10 @@ n
|
|||
|
||||
word_dict.txt 每行有一个单字,将字符与数字索引映射在一起,“and” 将被映射成 [2 5 1]
|
||||
|
||||
* 内置字典
|
||||
|
||||
PaddleOCR内置了一部分字典,可以按需使用。
|
||||
|
||||
`ppocr/utils/ppocr_keys_v1.txt` 是一个包含6623个字符的中文字典
|
||||
|
||||
`ppocr/utils/ic15_dict.txt` 是一个包含36个字符的英文字典
|
||||
|
@ -129,10 +141,10 @@ word_dict.txt 每行有一个单字,将字符与数字索引映射在一起,
|
|||
`ppocr/utils/dict/en_dict.txt` 是一个包含63个字符的英文字典
|
||||
|
||||
|
||||
您可以按需使用。
|
||||
|
||||
|
||||
目前的多语言模型仍处在demo阶段,会持续优化模型并补充语种,**非常欢迎您为我们提供其他语言的字典和字体**,
|
||||
如您愿意可将字典文件提交至 [dict](../../ppocr/utils/dict) 将语料文件提交至[corpus](../../ppocr/utils/corpus),我们会在Repo中感谢您。
|
||||
如您愿意可将字典文件提交至 [dict](../../ppocr/utils/dict),我们会在Repo中感谢您。
|
||||
|
||||
- 自定义字典
|
||||
|
||||
|
@ -140,13 +152,13 @@ word_dict.txt 每行有一个单字,将字符与数字索引映射在一起,
|
|||
并将 `character_type` 设置为 `ch`。
|
||||
|
||||
<a name="支持空格"></a>
|
||||
- 添加空格类别
|
||||
1.4 添加空格类别
|
||||
|
||||
如果希望支持识别"空格"类别, 请将yml文件中的 `use_space_char` 字段设置为 `True`。
|
||||
|
||||
|
||||
<a name="启动训练"></a>
|
||||
### 启动训练
|
||||
### 2. 启动训练
|
||||
|
||||
PaddleOCR提供了训练脚本、评估脚本和预测脚本,本节将以 CRNN 识别模型为例:
|
||||
|
||||
|
@ -171,7 +183,7 @@ tar -xf rec_mv3_none_bilstm_ctc_v2.0_train.tar && rm -rf rec_mv3_none_bilstm_ctc
|
|||
python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/rec/rec_icdar15_train.yml
|
||||
```
|
||||
<a name="数据增强"></a>
|
||||
- 数据增强
|
||||
#### 2.1 数据增强
|
||||
|
||||
PaddleOCR提供了多种数据增强方式,如果您希望在训练时加入扰动,请在配置文件中设置 `distort: true`。
|
||||
|
||||
|
@ -182,7 +194,7 @@ PaddleOCR提供了多种数据增强方式,如果您希望在训练时加入
|
|||
*由于OpenCV的兼容性问题,扰动操作暂时只支持Linux*
|
||||
|
||||
<a name="训练"></a>
|
||||
- 训练
|
||||
#### 2.2 训练
|
||||
|
||||
PaddleOCR支持训练和评估交替进行, 可以在 `configs/rec/rec_icdar15_train.yml` 中修改 `eval_batch_step` 设置评估频率,默认每500个iter评估一次。评估过程中默认将最佳acc模型,保存为 `output/rec_CRNN/best_accuracy` 。
|
||||
|
||||
|
@ -195,13 +207,14 @@ PaddleOCR支持训练和评估交替进行, 可以在 `configs/rec/rec_icdar15_t
|
|||
| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: |
|
||||
| [rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml) | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc |
|
||||
| [rec_chinese_common_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml) | CRNN | ResNet34_vd | None | BiLSTM | ctc |
|
||||
| rec_chinese_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc |
|
||||
| rec_chinese_common_train.yml | CRNN | ResNet34_vd | None | BiLSTM | ctc |
|
||||
| rec_icdar15_train.yml | CRNN | Mobilenet_v3 large 0.5 | None | BiLSTM | ctc |
|
||||
| rec_mv3_none_bilstm_ctc.yml | CRNN | Mobilenet_v3 large 0.5 | None | BiLSTM | ctc |
|
||||
| rec_mv3_none_none_ctc.yml | Rosetta | Mobilenet_v3 large 0.5 | None | None | ctc |
|
||||
| rec_r34_vd_none_bilstm_ctc.yml | CRNN | Resnet34_vd | None | BiLSTM | ctc |
|
||||
| rec_r34_vd_none_none_ctc.yml | Rosetta | Resnet34_vd | None | None | ctc |
|
||||
| rec_mv3_tps_bilstm_att.yml | CRNN | Mobilenet_v3 | TPS | BiLSTM | att |
|
||||
| rec_r34_vd_tps_bilstm_att.yml | CRNN | Resnet34_vd | TPS | BiLSTM | att |
|
||||
| rec_r50fpn_vd_none_srn.yml | SRN | Resnet50_fpn_vd | None | rnn | srn |
|
||||
|
||||
训练中文数据,推荐使用[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml),如您希望尝试其他算法在中文数据集上的效果,请参考下列说明修改配置文件:
|
||||
|
||||
|
@ -270,18 +283,111 @@ Eval:
|
|||
**注意,预测/评估时的配置文件请务必与训练一致。**
|
||||
|
||||
<a name="小语种"></a>
|
||||
- 小语种
|
||||
#### 2.3 小语种
|
||||
|
||||
PaddleOCR也提供了多语言的, `configs/rec/multi_languages` 路径下的提供了多语言的配置文件,目前PaddleOCR支持的多语言算法有:
|
||||
PaddleOCR目前已支持26种(除中文外)语种识别,`configs/rec/multi_languages` 路径下提供了一个多语言的配置文件模版: [rec_multi_language_lite_train.yml](../../configs/rec/multi_language/rec_multi_language_lite_train.yml)。
|
||||
|
||||
| 配置文件 | 算法名称 | backbone | trans | seq | pred | language |
|
||||
| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: |
|
||||
| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 英语 |
|
||||
| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 法语 |
|
||||
| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 德语 |
|
||||
| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 日语 |
|
||||
| rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 韩语 |
|
||||
您有两种方式创建所需的配置文件:
|
||||
|
||||
1. 通过脚本自动生成
|
||||
|
||||
[generate_multi_language_configs.py](../../configs/rec/multi_language/generate_multi_language_configs.py) 可以帮助您生成多语言模型的配置文件
|
||||
|
||||
- 以意大利语为例,如果您的数据是按如下格式准备的:
|
||||
```
|
||||
|-train_data
|
||||
|- it_train.txt # 训练集标签
|
||||
|- it_val.txt # 验证集标签
|
||||
|- data
|
||||
|- word_001.jpg
|
||||
|- word_002.jpg
|
||||
|- word_003.jpg
|
||||
| ...
|
||||
```
|
||||
|
||||
可以使用默认参数,生成配置文件:
|
||||
|
||||
```bash
|
||||
# 该代码需要在指定目录运行
|
||||
cd PaddleOCR/configs/rec/multi_language/
|
||||
# 通过-l或者--language参数设置需要生成的语种的配置文件,该命令会将默认参数写入配置文件
|
||||
python3 generate_multi_language_configs.py -l it
|
||||
```
|
||||
|
||||
- 如果您的数据放置在其他位置,或希望使用自己的字典,可以通过指定相关参数来生成配置文件:
|
||||
|
||||
```bash
|
||||
# -l或者--language字段是必须的
|
||||
# --train修改训练集,--val修改验证集,--data_dir修改数据集目录,--dict修改字典路径, -o修改对应默认参数
|
||||
cd PaddleOCR/configs/rec/multi_language/
|
||||
python3 generate_multi_language_configs.py -l it \ # 语种
|
||||
--train {path/of/train_label.txt} \ # 训练标签文件的路径
|
||||
--val {path/of/val_label.txt} \ # 验证集标签文件的路径
|
||||
--data_dir {train_data/path} \ # 训练数据的根目录
|
||||
--dict {path/of/dict} \ # 字典文件路径
|
||||
-o Global.use_gpu=False # 是否使用gpu
|
||||
...
|
||||
|
||||
```
|
||||
|
||||
2. 手动修改配置文件
|
||||
|
||||
您也可以手动修改模版中的以下几个字段:
|
||||
|
||||
```
|
||||
Global:
|
||||
use_gpu: True
|
||||
epoch_num: 500
|
||||
...
|
||||
character_type: it # 需要识别的语种
|
||||
character_dict_path: {path/of/dict} # 字典文件所在路径
|
||||
|
||||
Train:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: train_data/ # 数据存放根目录
|
||||
label_file_list: ["./train_data/train_list.txt"] # 训练集label路径
|
||||
...
|
||||
|
||||
Eval:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: train_data/ # 数据存放根目录
|
||||
label_file_list: ["./train_data/val_list.txt"] # 验证集label路径
|
||||
...
|
||||
|
||||
```
|
||||
|
||||
目前PaddleOCR支持的多语言算法有:
|
||||
|
||||
| 配置文件 | 算法名称 | backbone | trans | seq | pred | language | character_type |
|
||||
| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | :-----: |
|
||||
| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 中文繁体 | chinese_cht|
|
||||
| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 英语(区分大小写) | EN |
|
||||
| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 法语 | french |
|
||||
| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 德语 | german |
|
||||
| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 日语 | japan |
|
||||
| rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 韩语 | korean |
|
||||
| rec_it_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 意大利语 | it |
|
||||
| rec_xi_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 西班牙语 | xi |
|
||||
| rec_pu_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 葡萄牙语 | pu |
|
||||
| rec_ru_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 俄罗斯语 | ru |
|
||||
| rec_ar_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 阿拉伯语 | ar |
|
||||
| rec_hi_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 印地语 | hi |
|
||||
| rec_ug_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 维吾尔语 | ug |
|
||||
| rec_fa_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 波斯语 | fa |
|
||||
| rec_ur_ite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 乌尔都语 | ur |
|
||||
| rec_rs_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 塞尔维亚(latin)语 | rs |
|
||||
| rec_oc_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 欧西坦语 | oc |
|
||||
| rec_mr_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 马拉地语 | mr |
|
||||
| rec_ne_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 尼泊尔语 | ne |
|
||||
| rec_rsc_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 塞尔维亚(cyrillic)语 | rsc |
|
||||
| rec_bg_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 保加利亚语 | bg |
|
||||
| rec_uk_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 乌克兰语 | uk |
|
||||
| rec_be_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 白俄罗斯语 | be |
|
||||
| rec_te_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 泰卢固语 | te |
|
||||
| rec_ka_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 卡纳达语 | ka |
|
||||
| rec_ta_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 泰米尔语 | ta |
|
||||
|
||||
多语言模型训练方式与中文模型一致,训练数据集均为100w的合成数据,少量的字体可以在 [百度网盘](https://pan.baidu.com/s/1bS_u207Rm7YbY33wOECKDA) 上下载,提取码:frgi。
|
||||
|
||||
|
@ -320,7 +426,7 @@ Eval:
|
|||
...
|
||||
```
|
||||
<a name="评估"></a>
|
||||
### 评估
|
||||
### 3 评估
|
||||
|
||||
评估数据集可以通过 `configs/rec/rec_icdar15_train.yml` 修改Eval中的 `label_file_path` 设置。
|
||||
|
||||
|
@ -330,10 +436,10 @@ python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec
|
|||
```
|
||||
|
||||
<a name="预测"></a>
|
||||
### 预测
|
||||
### 4 预测
|
||||
|
||||
<a name="训练引擎预测"></a>
|
||||
* 训练引擎的预测
|
||||
#### 4.1 训练引擎的预测
|
||||
|
||||
使用 PaddleOCR 训练好的模型,可以通过以下脚本进行快速预测。
|
||||
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
# paddleocr package使用说明
|
||||
|
||||
## 快速上手
|
||||
## 1 快速上手
|
||||
|
||||
### 安装whl包
|
||||
### 1.1 安装whl包
|
||||
|
||||
pip安装
|
||||
```bash
|
||||
|
@ -14,9 +14,12 @@ pip install "paddleocr>=2.0.1" # 推荐使用2.0.1+版本
|
|||
python3 setup.py bdist_wheel
|
||||
pip3 install dist/paddleocr-x.x.x-py3-none-any.whl # x.x.x是paddleocr的版本号
|
||||
```
|
||||
### 1. 代码使用
|
||||
|
||||
* 检测+分类+识别全流程
|
||||
## 2 使用
|
||||
### 2.1 代码使用
|
||||
paddleocr whl包会自动下载ppocr轻量级模型作为默认模型,可以根据第3节**自定义模型**进行自定义更换。
|
||||
|
||||
* 检测+方向分类器+识别全流程
|
||||
```python
|
||||
from paddleocr import PaddleOCR, draw_ocr
|
||||
# Paddleocr目前支持中英文、英文、法语、德语、韩语、日语,可以通过修改lang参数进行切换
|
||||
|
@ -84,7 +87,7 @@ im_show.save('result.jpg')
|
|||
</div>
|
||||
|
||||
|
||||
* 分类+识别
|
||||
* 方向分类器+识别
|
||||
```python
|
||||
from paddleocr import PaddleOCR
|
||||
ocr = PaddleOCR(use_angle_cls=True) # need to run only once to download and load model into memory
|
||||
|
@ -143,7 +146,7 @@ for line in result:
|
|||
['韩国小馆', 0.9907421]
|
||||
```
|
||||
|
||||
* 单独执行分类
|
||||
* 单独执行方向分类器
|
||||
```python
|
||||
from paddleocr import PaddleOCR
|
||||
ocr = PaddleOCR(use_angle_cls=True) # need to run only once to download and load model into memory
|
||||
|
@ -157,14 +160,14 @@ for line in result:
|
|||
['0', 0.9999924]
|
||||
```
|
||||
|
||||
### 通过命令行使用
|
||||
### 2.2 通过命令行使用
|
||||
|
||||
查看帮助信息
|
||||
```bash
|
||||
paddleocr -h
|
||||
```
|
||||
|
||||
* 检测+分类+识别全流程
|
||||
* 检测+方向分类器+识别全流程
|
||||
```bash
|
||||
paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --use_angle_cls true
|
||||
```
|
||||
|
@ -188,7 +191,7 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg
|
|||
......
|
||||
```
|
||||
|
||||
* 分类+识别
|
||||
* 方向分类器+识别
|
||||
```bash
|
||||
paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --use_angle_cls true --det false
|
||||
```
|
||||
|
@ -220,7 +223,7 @@ paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --det false
|
|||
['韩国小馆', 0.9907421]
|
||||
```
|
||||
|
||||
* 单独执行分类
|
||||
* 单独执行方向分类器
|
||||
```bash
|
||||
paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --use_angle_cls true --det false --rec false
|
||||
```
|
||||
|
@ -230,11 +233,11 @@ paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --use_angle_cls tru
|
|||
['0', 0.9999924]
|
||||
```
|
||||
|
||||
## 自定义模型
|
||||
## 3 自定义模型
|
||||
当内置模型无法满足需求时,需要使用到自己训练的模型。
|
||||
首先,参照[inference.md](./inference.md) 第一节转换将检测、分类和识别模型转换为inference模型,然后按照如下方式使用
|
||||
|
||||
### 代码使用
|
||||
### 3.1 代码使用
|
||||
```python
|
||||
from paddleocr import PaddleOCR, draw_ocr
|
||||
# 模型路径下必须含有model和params文件
|
||||
|
@ -255,17 +258,17 @@ im_show = Image.fromarray(im_show)
|
|||
im_show.save('result.jpg')
|
||||
```
|
||||
|
||||
### 通过命令行使用
|
||||
### 3.2 通过命令行使用
|
||||
|
||||
```bash
|
||||
paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_dir} --rec_model_dir {your_rec_model_dir} --rec_char_dict_path {your_rec_char_dict_path} --cls_model_dir {your_cls_model_dir} --use_angle_cls true
|
||||
```
|
||||
|
||||
### 使用网络图片或者numpy数组作为输入
|
||||
## 4 使用网络图片或者numpy数组作为输入
|
||||
|
||||
1. 网络图片
|
||||
### 4.1 网络图片
|
||||
|
||||
代码使用
|
||||
- 代码使用
|
||||
```python
|
||||
from paddleocr import PaddleOCR, draw_ocr
|
||||
# Paddleocr目前支持中英文、英文、法语、德语、韩语、日语,可以通过修改lang参数进行切换
|
||||
|
@ -286,12 +289,12 @@ im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc
|
|||
im_show = Image.fromarray(im_show)
|
||||
im_show.save('result.jpg')
|
||||
```
|
||||
命令行模式
|
||||
- 命令行模式
|
||||
```bash
|
||||
paddleocr --image_dir http://n.sinaimg.cn/ent/transform/w630h933/20171222/o111-fypvuqf1838418.jpg --use_angle_cls=true
|
||||
```
|
||||
|
||||
2. numpy数组
|
||||
### 4.2 numpy数组
|
||||
仅通过代码使用时支持numpy数组作为输入
|
||||
```python
|
||||
from paddleocr import PaddleOCR, draw_ocr
|
||||
|
@ -301,7 +304,7 @@ ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to downlo
|
|||
img_path = 'PaddleOCR/doc/imgs/11.jpg'
|
||||
img = cv2.imread(img_path)
|
||||
# img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY), 如果你自己训练的模型支持灰度图,可以将这句话的注释取消
|
||||
result = ocr.ocr(img_path, cls=True)
|
||||
result = ocr.ocr(img, cls=True)
|
||||
for line in result:
|
||||
print(line)
|
||||
|
||||
|
@ -316,7 +319,7 @@ im_show = Image.fromarray(im_show)
|
|||
im_show.save('result.jpg')
|
||||
```
|
||||
|
||||
## 参数说明
|
||||
## 5 参数说明
|
||||
|
||||
| 字段 | 说明 | 默认值 |
|
||||
|-------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------|
|
||||
|
|
|
@ -42,8 +42,8 @@ PaddleOCR open-source text recognition algorithms list:
|
|||
- [x] CRNN([paper](https://arxiv.org/abs/1507.05717))[7]
|
||||
- [x] Rosetta([paper](https://arxiv.org/abs/1910.05085))[10]
|
||||
- [x] STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html))[11]
|
||||
- [ ] RARE([paper](https://arxiv.org/abs/1603.03915v1))[12] coming soon
|
||||
- [ ] SRN([paper](https://arxiv.org/abs/2003.12294))[5] coming soon
|
||||
- [x] RARE([paper](https://arxiv.org/abs/1603.03915v1))[12]
|
||||
- [x] SRN([paper](https://arxiv.org/abs/2003.12294))[5]
|
||||
|
||||
Refer to [DTRB](https://arxiv.org/abs/1904.01906), the training and evaluation result of these above text recognition (using MJSynth and SynthText for training, evaluate on IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE) is as follow:
|
||||
|
||||
|
@ -55,5 +55,8 @@ Refer to [DTRB](https://arxiv.org/abs/1904.01906), the training and evaluation r
|
|||
|CRNN|MobileNetV3|79.97%|rec_mv3_none_bilstm_ctc|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_bilstm_ctc_v2.0_train.tar)|
|
||||
|StarNet|Resnet34_vd|84.44%|rec_r34_vd_tps_bilstm_ctc|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_tps_bilstm_ctc_v2.0_train.tar)|
|
||||
|StarNet|MobileNetV3|81.42%|rec_mv3_tps_bilstm_ctc|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_tps_bilstm_ctc_v2.0_train.tar)|
|
||||
|RARE|MobileNetV3|82.5%|rec_mv3_tps_bilstm_att |[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_tps_bilstm_att_v2.0_train.tar)|
|
||||
|RARE|Resnet34_vd|83.6%|rec_r34_vd_tps_bilstm_att |[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_tps_bilstm_att_v2.0_train.tar)|
|
||||
|SRN|Resnet50_vd_fpn| 88.52% | rec_r50fpn_vd_none_srn |[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r50_vd_srn_train.tar)|
|
||||
|
||||
Please refer to the document for training guide and use of PaddleOCR text recognition algorithms [Text recognition model training/evaluation/prediction](./recognition_en.md)
|
||||
|
|
|
@ -1,5 +1,12 @@
|
|||
## TEXT ANGLE CLASSIFICATION
|
||||
|
||||
### Method introduction
|
||||
The angle classification is used in the scene where the image is not 0 degrees. In this scene, it is necessary to perform a correction operation on the text line detected in the picture. In the PaddleOCR system,
|
||||
The text line image obtained after text detection is sent to the recognition model after affine transformation. At this time, only a 0 and 180 degree angle classification of the text is required, so the built-in PaddleOCR text angle classifier **only supports 0 and 180 degree classification**. If you want to support more angles, you can modify the algorithm yourself to support.
|
||||
|
||||
Example of 0 and 180 degree data samples:
|
||||
|
||||
![](../imgs_results/angle_class_example.jpg)
|
||||
### DATA PREPARATION
|
||||
|
||||
Please organize the dataset as follows:
|
||||
|
@ -66,7 +73,7 @@ Start training:
|
|||
```
|
||||
# Set PYTHONPATH path
|
||||
export PYTHONPATH=$PYTHONPATH:.
|
||||
# GPU training Support single card and multi-card training, specify the card number through --gpus. If your paddle version is less than 2.0rc1, please use '--selected_gpus'
|
||||
# GPU training Support single card and multi-card training, specify the card number through --gpus.
|
||||
# Start training, the following command has been written into the train.sh file, just modify the configuration file path in the file
|
||||
python3 -m paddle.distributed.launch --gpus '0,1,2,3,4,5,6,7' tools/train.py -c configs/cls/cls_mv3.yml
|
||||
```
|
||||
|
|
|
@ -76,7 +76,7 @@ You can also use `-o` to change the training parameters without modifying the ym
|
|||
python3 tools/train.py -c configs/det/det_mv3_db.yml -o Optimizer.base_lr=0.0001
|
||||
|
||||
# multi-GPU training
|
||||
# Set the GPU ID used by the '--gpus' parameter; If your paddle version is less than 2.0rc1, please use '--selected_gpus'
|
||||
# Set the GPU ID used by the '--gpus' parameter.
|
||||
python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/det/det_mv3_db.yml -o Optimizer.base_lr=0.0001
|
||||
|
||||
|
||||
|
|
|
@ -5,7 +5,8 @@ The inference model (the model saved by `paddle.jit.save`) is generally a solidi
|
|||
|
||||
The model saved during the training process is the checkpoints model, which saves the parameters of the model and is mostly used to resume training.
|
||||
|
||||
Compared with the checkpoints model, the inference model will additionally save the structural information of the model. It has superior performance in predicting in deployment and accelerating inferencing, is flexible and convenient, and is suitable for integration with actual systems. For more details, please refer to the document [Classification Framework](https://github.com/PaddlePaddle/PaddleClas/blob/master/docs/zh_CN/extension/paddle_inference.md).
|
||||
Compared with the checkpoints model, the inference model will additionally save the structural information of the model. Therefore, it is easier to deploy because the model structure and model parameters are already solidified in the inference model file, and is suitable for integration with actual systems.
|
||||
For more details, please refer to the document [Classification Framework](https://github.com/PaddlePaddle/PaddleClas/blob/release%2F2.0/docs/zh_CN/extension/paddle_mobile_inference.md).
|
||||
|
||||
Next, we first introduce how to convert a trained model into an inference model, and then we will introduce text detection, text recognition, angle class, and the concatenation of them based on inference model.
|
||||
|
||||
|
@ -25,6 +26,7 @@ Next, we first introduce how to convert a trained model into an inference model,
|
|||
- [TEXT RECOGNITION MODEL INFERENCE](#RECOGNITION_MODEL_INFERENCE)
|
||||
- [1. LIGHTWEIGHT CHINESE MODEL](#LIGHTWEIGHT_RECOGNITION)
|
||||
- [2. CTC-BASED TEXT RECOGNITION MODEL INFERENCE](#CTC-BASED_RECOGNITION)
|
||||
- [3. SRN-BASED TEXT RECOGNITION MODEL INFERENCE](#SRN-BASED_RECOGNITION)
|
||||
- [3. TEXT RECOGNITION MODEL INFERENCE USING CUSTOM CHARACTERS DICTIONARY](#USING_CUSTOM_CHARACTERS)
|
||||
- [4. MULTILINGUAL MODEL INFERENCE](MULTILINGUAL_MODEL_INFERENCE)
|
||||
|
||||
|
@ -146,7 +148,7 @@ The visual text detection results are saved to the ./inference_results folder by
|
|||
![](../imgs_results/det_res_00018069.jpg)
|
||||
|
||||
You can use the parameters `limit_type` and `det_limit_side_len` to limit the size of the input image,
|
||||
The optional parameters of `litmit_type` are [`max`, `min`], and
|
||||
The optional parameters of `limit_type` are [`max`, `min`], and
|
||||
`det_limit_size_len` is a positive integer, generally set to a multiple of 32, such as 960.
|
||||
|
||||
The default setting of the parameters is `limit_type='max', det_limit_side_len=960`. Indicates that the longest side of the network input image cannot exceed 960,
|
||||
|
@ -304,8 +306,23 @@ self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
|
|||
dict_character = list(self.character_str)
|
||||
```
|
||||
|
||||
<a name="SRN-BASED_RECOGNITION"></a>
|
||||
### 3. SRN-BASED TEXT RECOGNITION MODEL INFERENCE
|
||||
|
||||
The recognition model based on SRN requires additional setting of the recognition algorithm parameter
|
||||
--rec_algorithm="SRN". At the same time, it is necessary to ensure that the predicted shape is consistent
|
||||
with the training, such as: --rec_image_shape="1, 64, 256"
|
||||
|
||||
```
|
||||
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" \
|
||||
--rec_model_dir="./inference/srn/" \
|
||||
--rec_image_shape="1, 64, 256" \
|
||||
--rec_char_type="en" \
|
||||
--rec_algorithm="SRN"
|
||||
```
|
||||
|
||||
<a name="USING_CUSTOM_CHARACTERS"></a>
|
||||
### 3. TEXT RECOGNITION MODEL INFERENCE USING CUSTOM CHARACTERS DICTIONARY
|
||||
### 4. TEXT RECOGNITION MODEL INFERENCE USING CUSTOM CHARACTERS DICTIONARY
|
||||
If the text dictionary is modified during training, when using the inference model to predict, you need to specify the dictionary path used by `--rec_char_dict_path`, and set `rec_char_type=ch`
|
||||
|
||||
```
|
||||
|
@ -313,12 +330,12 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png
|
|||
```
|
||||
|
||||
<a name="MULTILINGUAL_MODEL_INFERENCE"></a>
|
||||
### 4. MULTILINGAUL MODEL INFERENCE
|
||||
### 5. MULTILINGAUL MODEL INFERENCE
|
||||
If you need to predict other language models, when using inference model prediction, you need to specify the dictionary path used by `--rec_char_dict_path`. At the same time, in order to get the correct visualization results,
|
||||
You need to specify the visual font path through `--vis_font_path`. There are small language fonts provided by default under the `doc/` path, such as Korean recognition:
|
||||
You need to specify the visual font path through `--vis_font_path`. There are small language fonts provided by default under the `doc/fonts` path, such as Korean recognition:
|
||||
|
||||
```
|
||||
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_type="korean" --rec_char_dict_path="ppocr/utils/dict/korean_dict.txt" --vis_font_path="doc/korean.ttf"
|
||||
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_type="korean" --rec_char_dict_path="ppocr/utils/dict/korean_dict.txt" --vis_font_path="doc/fonts/korean.ttf"
|
||||
```
|
||||
![](../imgs_words/korean/1.jpg)
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
After testing, paddleocr can run on glibc 2.23. You can also test other glibc versions or install glic 2.23 for the best compatibility.
|
||||
|
||||
PaddleOCR working environment:
|
||||
- PaddlePaddle 1.8+, Recommend PaddlePaddle 2.0rc1
|
||||
- PaddlePaddle 2.0.0
|
||||
- python3.7
|
||||
- glibc 2.23
|
||||
|
||||
|
@ -33,15 +33,15 @@ You can also visit [DockerHub](https://hub.docker.com/r/paddlepaddle/paddle/tags
|
|||
sudo docker container exec -it ppocr /bin/bash
|
||||
```
|
||||
|
||||
**2. Install PaddlePaddle Fluid v2.0**
|
||||
**2. Install PaddlePaddle 2.0**
|
||||
```
|
||||
pip3 install --upgrade pip
|
||||
|
||||
# If you have cuda9 or cuda10 installed on your machine, please run the following command to install
|
||||
python3 -m pip install paddlepaddle-gpu==2.0rc1 -i https://mirror.baidu.com/pypi/simple
|
||||
python3 -m pip install paddlepaddle-gpu==2.0.0 -i https://mirror.baidu.com/pypi/simple
|
||||
|
||||
# If you only have cpu on your machine, please run the following command to install
|
||||
python3 -m pip install paddlepaddle==2.0rc1 -i https://mirror.baidu.com/pypi/simple
|
||||
python3 -m pip install paddlepaddle==2.0.0 -i https://mirror.baidu.com/pypi/simple
|
||||
```
|
||||
For more software version requirements, please refer to the instructions in [Installation Document](https://www.paddlepaddle.org.cn/install/quick) for operation.
|
||||
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
## OCR model list(V2.0, updated on 2021.1.20)
|
||||
**Note** : Compared with [models 1.1](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/models_list_en.md), which are trained with static graph programming paradigm, models 2.0 are the dynamic graph trained version and achieve close performance.
|
||||
> **Note**
|
||||
> 1. Compared with [models 1.1](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/models_list_en.md), which are trained with static graph programming paradigm, models 2.0 are the dynamic graph trained version and achieve close performance.
|
||||
> 2. All models in this tutorial are all ppocr-series models, for more introduction of algorithms and models based on public dataset, you can refer to [algorithm overview tutorial](./algorithm_overview_en.md).
|
||||
|
||||
- [1. Text Detection Model](#Detection)
|
||||
- [2. Text Recognition Model](#Recognition)
|
||||
|
@ -12,9 +14,13 @@ The downloadable models provided by PaddleOCR include `inference model`, `traine
|
|||
|
||||
|model type|model format|description|
|
||||
|--- | --- | --- |
|
||||
|inference model|inference.pdmodel、inference.pdiparams|Used for reasoning based on Python prediction engine,[detail](./inference_en.md)|
|
||||
|inference model|inference.pdmodel、inference.pdiparams|Used for inference based on Paddle inference engine,[detail](./inference_en.md)|
|
||||
|trained model, pre-trained model|\*.pdparams、\*.pdopt、\*.states |The checkpoints model saved in the training process, which stores the parameters of the model, mostly used for model evaluation and continuous training.|
|
||||
|slim model|\*.nb|Generally used for Lite deployment|
|
||||
|slim model|\*.nb| Model compressed by PaddleSim (a model compression tool using PaddlePaddle), which is suitable for mobile-side deployment scenarios (Paddle-Lite is needed for slim model deployment). |
|
||||
|
||||
Relationship of the above models is as follows.
|
||||
|
||||
![](../imgs_en/model_prod_flow_en.png)
|
||||
|
||||
<a name="Detection"></a>
|
||||
### 1. Text Detection Model
|
||||
|
@ -80,7 +86,7 @@ If you want to train your own model, you can prepare the training set file, veri
|
|||
cd {your/path/}PaddleOCR/configs/rec/multi_language/
|
||||
# The -l or --language parameter is required
|
||||
# --train modify train_list path
|
||||
# --val modify eval_list path
|
||||
# --val modify eval_list path
|
||||
# --data_dir modify data dir
|
||||
# -o modify default parameters
|
||||
# --dict Change the dictionary path. The example uses the default dictionary path, so that this parameter can be empty.
|
||||
|
@ -93,7 +99,7 @@ python3 generate_multi_language_configs.py -l it \
|
|||
|model name|description|config|model size|download|
|
||||
| --- | --- | --- | --- | --- |
|
||||
| french_mobile_v2.0_rec |Lightweight model for French recognition|[rec_french_lite_train.yml](../../configs/rec/multi_language/rec_french_lite_train.yml)|2.65M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_train.tar) |
|
||||
| german_mobile_v2.0_rec |Lightweight model for French recognition|[rec_german_lite_train.yml](../../configs/rec/multi_language/rec_german_lite_train.yml)|2.65M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_train.tar) |
|
||||
| german_mobile_v2.0_rec |Lightweight model for German recognition|[rec_german_lite_train.yml](../../configs/rec/multi_language/rec_german_lite_train.yml)|2.65M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_train.tar) |
|
||||
| korean_mobile_v2.0_rec |Lightweight model for Korean recognition|[rec_korean_lite_train.yml](../../configs/rec/multi_language/rec_korean_lite_train.yml)|3.9M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_train.tar) |
|
||||
| japan_mobile_v2.0_rec |Lightweight model for Japanese recognition|[rec_japan_lite_train.yml](../../configs/rec/multi_language/rec_japan_lite_train.yml)|4.23M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_train.tar) |
|
||||
| it_mobile_v2.0_rec |Lightweight model for Italian recognition|rec_it_lite_train.yml|2.53M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/it_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/it_mobile_v2.0_rec_train.tar) |
|
||||
|
|
|
@ -1,59 +1,95 @@
|
|||
## TEXT RECOGNITION
|
||||
|
||||
- [DATA PREPARATION](#DATA_PREPARATION)
|
||||
- [Dataset Download](#Dataset_download)
|
||||
- [Costom Dataset](#Costom_Dataset)
|
||||
- [Dictionary](#Dictionary)
|
||||
- [Add Space Category](#Add_space_category)
|
||||
- [1 DATA PREPARATION](#DATA_PREPARATION)
|
||||
- [1.1 Costom Dataset](#Costom_Dataset)
|
||||
- [1.2 Dataset Download](#Dataset_download)
|
||||
- [1.3 Dictionary](#Dictionary)
|
||||
- [1.4 Add Space Category](#Add_space_category)
|
||||
|
||||
- [TRAINING](#TRAINING)
|
||||
- [Data Augmentation](#Data_Augmentation)
|
||||
- [Training](#Training)
|
||||
- [Multi-language](#Multi_language)
|
||||
- [2 TRAINING](#TRAINING)
|
||||
- [2.1 Data Augmentation](#Data_Augmentation)
|
||||
- [2.2 Training](#Training)
|
||||
- [2.3 Multi-language](#Multi_language)
|
||||
|
||||
- [EVALUATION](#EVALUATION)
|
||||
- [3 EVALUATION](#EVALUATION)
|
||||
|
||||
- [PREDICTION](#PREDICTION)
|
||||
- [Training engine prediction](#Training_engine_prediction)
|
||||
- [4 PREDICTION](#PREDICTION)
|
||||
- [4.1 Training engine prediction](#Training_engine_prediction)
|
||||
|
||||
<a name="DATA_PREPARATION"></a>
|
||||
### DATA PREPARATION
|
||||
|
||||
|
||||
PaddleOCR supports two data formats: `LMDB` is used to train public data and evaluation algorithms; `general data` is used to train your own data:
|
||||
PaddleOCR supports two data formats:
|
||||
- `LMDB` is used to train data sets stored in lmdb format;
|
||||
- `general data` is used to train data sets stored in text files:
|
||||
|
||||
Please organize the dataset as follows:
|
||||
|
||||
The default storage path for training data is `PaddleOCR/train_data`, if you already have a dataset on your disk, just create a soft link to the dataset directory:
|
||||
|
||||
```
|
||||
# linux and mac os
|
||||
ln -sf <path/to/dataset> <path/to/paddle_ocr>/train_data/dataset
|
||||
# windows
|
||||
mklink /d <path/to/paddle_ocr>/train_data/dataset <path/to/dataset>
|
||||
```
|
||||
|
||||
<a name="Dataset_download"></a>
|
||||
* Dataset download
|
||||
|
||||
If you do not have a dataset locally, you can download it on the official website [icdar2015](http://rrc.cvc.uab.es/?ch=4&com=downloads). Also refer to [DTRB](https://github.com/clovaai/deep-text-recognition-benchmark#download-lmdb-dataset-for-traininig-and-evaluation-from-here),download the lmdb format dataset required for benchmark
|
||||
|
||||
If you want to reproduce the paper indicators of SRN, you need to download offline [augmented data](https://pan.baidu.com/s/1-HSZ-ZVdqBF2HaBZ5pRAKA), extraction code: y3ry. The augmented data is obtained by rotation and perturbation of mjsynth and synthtext. Please unzip the data to {your_path}/PaddleOCR/train_data/data_lmdb_Release/training/path.
|
||||
|
||||
<a name="Costom_Dataset"></a>
|
||||
* Use your own dataset:
|
||||
#### 1.1 Costom dataset
|
||||
|
||||
If you want to use your own data for training, please refer to the following to organize your data.
|
||||
|
||||
- Training set
|
||||
|
||||
First put the training images in the same folder (train_images), and use a txt file (rec_gt_train.txt) to store the image path and label.
|
||||
It is recommended to put the training images in the same folder, and use a txt file (rec_gt_train.txt) to store the image path and label. The contents of the txt file are as follows:
|
||||
|
||||
* Note: by default, the image path and image label are split with \t, if you use other methods to split, it will cause training error
|
||||
|
||||
```
|
||||
" Image file name Image annotation "
|
||||
|
||||
train_data/train_0001.jpg 简单可依赖
|
||||
train_data/train_0002.jpg 用科技让复杂的世界更简单
|
||||
train_data/rec/train/word_001.jpg 简单可依赖
|
||||
train_data/rec/train/word_002.jpg 用科技让复杂的世界更简单
|
||||
...
|
||||
```
|
||||
|
||||
The final training set should have the following file structure:
|
||||
|
||||
```
|
||||
|-train_data
|
||||
|-rec
|
||||
|- rec_gt_train.txt
|
||||
|- train
|
||||
|- word_001.png
|
||||
|- word_002.jpg
|
||||
|- word_003.jpg
|
||||
| ...
|
||||
```
|
||||
|
||||
- Test set
|
||||
|
||||
Similar to the training set, the test set also needs to be provided a folder containing all images (test) and a rec_gt_test.txt. The structure of the test set is as follows:
|
||||
|
||||
```
|
||||
|-train_data
|
||||
|-rec
|
||||
|-ic15_data
|
||||
|- rec_gt_test.txt
|
||||
|- test
|
||||
|- word_001.jpg
|
||||
|- word_002.jpg
|
||||
|- word_003.jpg
|
||||
| ...
|
||||
```
|
||||
|
||||
<a name="Dataset_download"></a>
|
||||
#### 1.2 Dataset download
|
||||
|
||||
If you do not have a dataset locally, you can download it on the official website [icdar2015](http://rrc.cvc.uab.es/?ch=4&com=downloads). Also refer to [DTRB](https://github.com/clovaai/deep-text-recognition-benchmark#download-lmdb-dataset-for-traininig-and-evaluation-from-here) ,download the lmdb format dataset required for benchmark
|
||||
|
||||
If you want to reproduce the paper indicators of SRN, you need to download offline [augmented data](https://pan.baidu.com/s/1-HSZ-ZVdqBF2HaBZ5pRAKA), extraction code: y3ry. The augmented data is obtained by rotation and perturbation of mjsynth and synthtext. Please unzip the data to {your_path}/PaddleOCR/train_data/data_lmdb_Release/training/path.
|
||||
|
||||
PaddleOCR provides label files for training the icdar2015 dataset, which can be downloaded in the following ways:
|
||||
|
||||
```
|
||||
|
@ -63,35 +99,8 @@ wget -P ./train_data/ic15_data https://paddleocr.bj.bcebos.com/dataset/rec_gt_t
|
|||
wget -P ./train_data/ic15_data https://paddleocr.bj.bcebos.com/dataset/rec_gt_test.txt
|
||||
```
|
||||
|
||||
The final training set should have the following file structure:
|
||||
|
||||
```
|
||||
|-train_data
|
||||
|-ic15_data
|
||||
|- rec_gt_train.txt
|
||||
|- train
|
||||
|- word_001.png
|
||||
|- word_002.jpg
|
||||
|- word_003.jpg
|
||||
| ...
|
||||
```
|
||||
|
||||
- Test set
|
||||
|
||||
Similar to the training set, the test set also needs to be provided a folder containing all images (test) and a rec_gt_test.txt. The structure of the test set is as follows:
|
||||
|
||||
```
|
||||
|-train_data
|
||||
|-ic15_data
|
||||
|- rec_gt_test.txt
|
||||
|- test
|
||||
|- word_001.jpg
|
||||
|- word_002.jpg
|
||||
|- word_003.jpg
|
||||
| ...
|
||||
```
|
||||
<a name="Dictionary"></a>
|
||||
- Dictionary
|
||||
#### 1.3 Dictionary
|
||||
|
||||
Finally, a dictionary ({word_dict_name}.txt) needs to be provided so that when the model is trained, all the characters that appear can be mapped to the dictionary index.
|
||||
|
||||
|
@ -108,6 +117,8 @@ n
|
|||
|
||||
In `word_dict.txt`, there is a single word in each line, which maps characters and numeric indexes together, e.g "and" will be mapped to [2 5 1]
|
||||
|
||||
PaddleOCR has built-in dictionaries, which can be used on demand.
|
||||
|
||||
`ppocr/utils/ppocr_keys_v1.txt` is a Chinese dictionary with 6623 characters.
|
||||
|
||||
`ppocr/utils/ic15_dict.txt` is an English dictionary with 63 characters
|
||||
|
@ -123,10 +134,8 @@ In `word_dict.txt`, there is a single word in each line, which maps characters a
|
|||
`ppocr/utils/dict/en_dict.txt` is a English dictionary with 63 characters
|
||||
|
||||
|
||||
You can use it on demand.
|
||||
|
||||
The current multi-language model is still in the demo stage and will continue to optimize the model and add languages. **You are very welcome to provide us with dictionaries and fonts in other languages**,
|
||||
If you like, you can submit the dictionary file to [dict](../../ppocr/utils/dict) or corpus file to [corpus](../../ppocr/utils/corpus) and we will thank you in the Repo.
|
||||
If you like, you can submit the dictionary file to [dict](../../ppocr/utils/dict) and we will thank you in the Repo.
|
||||
|
||||
|
||||
To customize the dict file, please modify the `character_dict_path` field in `configs/rec/rec_icdar15_train.yml` and set `character_type` to `ch`.
|
||||
|
@ -136,14 +145,14 @@ To customize the dict file, please modify the `character_dict_path` field in `co
|
|||
If you need to customize dic file, please add character_dict_path field in configs/rec/rec_icdar15_train.yml to point to your dictionary path. And set character_type to ch.
|
||||
|
||||
<a name="Add_space_category"></a>
|
||||
- Add space category
|
||||
#### 1.4 Add space category
|
||||
|
||||
If you want to support the recognition of the `space` category, please set the `use_space_char` field in the yml file to `True`.
|
||||
|
||||
**Note: use_space_char only takes effect when character_type=ch**
|
||||
|
||||
<a name="TRAINING"></a>
|
||||
### TRAINING
|
||||
### 2 TRAINING
|
||||
|
||||
PaddleOCR provides training scripts, evaluation scripts, and prediction scripts. In this section, the CRNN recognition model will be used as an example:
|
||||
|
||||
|
@ -166,7 +175,7 @@ Start training:
|
|||
python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/rec/rec_icdar15_train.yml
|
||||
```
|
||||
<a name="Data_Augmentation"></a>
|
||||
- Data Augmentation
|
||||
#### 2.1 Data Augmentation
|
||||
|
||||
PaddleOCR provides a variety of data augmentation methods. If you want to add disturbance during training, please set `distort: true` in the configuration file.
|
||||
|
||||
|
@ -175,7 +184,7 @@ The default perturbation methods are: cvtColor, blur, jitter, Gasuss noise, rand
|
|||
Each disturbance method is selected with a 50% probability during the training process. For specific code implementation, please refer to: [img_tools.py](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/ppocr/data/rec/img_tools.py)
|
||||
|
||||
<a name="Training"></a>
|
||||
- Training
|
||||
#### 2.2 Training
|
||||
|
||||
PaddleOCR supports alternating training and evaluation. You can modify `eval_batch_step` in `configs/rec/rec_icdar15_train.yml` to set the evaluation frequency. By default, it is evaluated every 500 iter and the best acc model is saved under `output/rec_CRNN/best_accuracy` during the evaluation process.
|
||||
|
||||
|
@ -195,6 +204,10 @@ If the evaluation set is large, the test will be time-consuming. It is recommend
|
|||
| rec_mv3_none_none_ctc.yml | Rosetta | Mobilenet_v3 large 0.5 | None | None | ctc |
|
||||
| rec_r34_vd_none_bilstm_ctc.yml | CRNN | Resnet34_vd | None | BiLSTM | ctc |
|
||||
| rec_r34_vd_none_none_ctc.yml | Rosetta | Resnet34_vd | None | None | ctc |
|
||||
| rec_mv3_tps_bilstm_att.yml | CRNN | Mobilenet_v3 | TPS | BiLSTM | att |
|
||||
| rec_r34_vd_tps_bilstm_att.yml | CRNN | Resnet34_vd | TPS | BiLSTM | att |
|
||||
| rec_r50fpn_vd_none_srn.yml | SRN | Resnet50_fpn_vd | None | rnn | srn |
|
||||
|
||||
|
||||
For training Chinese data, it is recommended to use
|
||||
[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml). If you want to try the result of other algorithms on the Chinese data set, please refer to the following instructions to modify the configuration file:
|
||||
|
@ -264,17 +277,118 @@ Eval:
|
|||
**Note that the configuration file for prediction/evaluation must be consistent with the training.**
|
||||
|
||||
<a name="Multi_language"></a>
|
||||
- Multi-language
|
||||
#### 2.3 Multi-language
|
||||
|
||||
PaddleOCR also provides multi-language. The configuration file in `configs/rec/multi_languages` provides multi-language configuration files. Currently, the multi-language algorithms supported by PaddleOCR are:
|
||||
PaddleOCR currently supports 26 (except Chinese) language recognition. A multi-language configuration file template is
|
||||
provided under the path `configs/rec/multi_languages`: [rec_multi_language_lite_train.yml](../../configs/rec/multi_language/rec_multi_language_lite_train.yml)。
|
||||
|
||||
There are two ways to create the required configuration file::
|
||||
|
||||
1. Automatically generated by script
|
||||
|
||||
[generate_multi_language_configs.py](../../configs/rec/multi_language/generate_multi_language_configs.py) Can help you generate configuration files for multi-language models
|
||||
|
||||
- Take Italian as an example, if your data is prepared in the following format:
|
||||
```
|
||||
|-train_data
|
||||
|- it_train.txt # train_set label
|
||||
|- it_val.txt # val_set label
|
||||
|- data
|
||||
|- word_001.jpg
|
||||
|- word_002.jpg
|
||||
|- word_003.jpg
|
||||
| ...
|
||||
```
|
||||
|
||||
You can use the default parameters to generate a configuration file:
|
||||
|
||||
```bash
|
||||
# The code needs to be run in the specified directory
|
||||
cd PaddleOCR/configs/rec/multi_language/
|
||||
# Set the configuration file of the language to be generated through the -l or --language parameter.
|
||||
# This command will write the default parameters into the configuration file
|
||||
python3 generate_multi_language_configs.py -l it
|
||||
```
|
||||
|
||||
- If your data is placed in another location, or you want to use your own dictionary, you can generate the configuration file by specifying the relevant parameters:
|
||||
|
||||
```bash
|
||||
# -l or --language field is required
|
||||
# --train to modify the training set
|
||||
# --val to modify the validation set
|
||||
# --data_dir to modify the data set directory
|
||||
# --dict to modify the dict path
|
||||
# -o to modify the corresponding default parameters
|
||||
cd PaddleOCR/configs/rec/multi_language/
|
||||
python3 generate_multi_language_configs.py -l it \ # language
|
||||
--train {path/of/train_label.txt} \ # path of train_label
|
||||
--val {path/of/val_label.txt} \ # path of val_label
|
||||
--data_dir {train_data/path} \ # root directory of training data
|
||||
--dict {path/of/dict} \ # path of dict
|
||||
-o Global.use_gpu=False # whether to use gpu
|
||||
...
|
||||
|
||||
```
|
||||
|
||||
2. Manually modify the configuration file
|
||||
|
||||
You can also manually modify the following fields in the template:
|
||||
|
||||
```
|
||||
Global:
|
||||
use_gpu: True
|
||||
epoch_num: 500
|
||||
...
|
||||
character_type: it # language
|
||||
character_dict_path: {path/of/dict} # path of dict
|
||||
|
||||
Train:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: train_data/ # root directory of training data
|
||||
label_file_list: ["./train_data/train_list.txt"] # train label path
|
||||
...
|
||||
|
||||
Eval:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: train_data/ # root directory of val data
|
||||
label_file_list: ["./train_data/val_list.txt"] # val label path
|
||||
...
|
||||
|
||||
```
|
||||
|
||||
Currently, the multi-language algorithms supported by PaddleOCR are:
|
||||
|
||||
| Configuration file | Algorithm name | backbone | trans | seq | pred | language | character_type |
|
||||
| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | :-----: |
|
||||
| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | chinese traditional | chinese_cht|
|
||||
| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | English(Case sensitive) | EN |
|
||||
| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | French | french |
|
||||
| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | German | german |
|
||||
| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Japanese | japan |
|
||||
| rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Korean | korean |
|
||||
| rec_it_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Italian | it |
|
||||
| rec_xi_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Spanish | xi |
|
||||
| rec_pu_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Portuguese | pu |
|
||||
| rec_ru_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Russia | ru |
|
||||
| rec_ar_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Arabic | ar |
|
||||
| rec_hi_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Hindi | hi |
|
||||
| rec_ug_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Uyghur | ug |
|
||||
| rec_fa_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Persian(Farsi) | fa |
|
||||
| rec_ur_ite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Urdu | ur |
|
||||
| rec_rs_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Serbian(latin) | rs |
|
||||
| rec_oc_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Occitan | oc |
|
||||
| rec_mr_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Marathi | mr |
|
||||
| rec_ne_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Nepali | ne |
|
||||
| rec_rsc_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Serbian(cyrillic) | rsc |
|
||||
| rec_bg_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Bulgarian | bg |
|
||||
| rec_uk_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Ukranian | uk |
|
||||
| rec_be_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Belarusian | be |
|
||||
| rec_te_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Telugu | te |
|
||||
| rec_ka_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Kannada | ka |
|
||||
| rec_ta_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Tamil | ta |
|
||||
|
||||
| Configuration file | Algorithm name | backbone | trans | seq | pred | language |
|
||||
| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: |
|
||||
| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | English |
|
||||
| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | French |
|
||||
| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | German |
|
||||
| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Japanese |
|
||||
| rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Korean |
|
||||
|
||||
The multi-language model training method is the same as the Chinese model. The training data set is 100w synthetic data. A small amount of fonts and test data can be downloaded on [Baidu Netdisk](https://pan.baidu.com/s/1bS_u207Rm7YbY33wOECKDA),Extraction code:frgi.
|
||||
|
||||
|
@ -315,7 +429,7 @@ Eval:
|
|||
```
|
||||
|
||||
<a name="EVALUATION"></a>
|
||||
### EVALUATION
|
||||
### 3 EVALUATION
|
||||
|
||||
The evaluation dataset can be set by modifying the `Eval.dataset.label_file_list` field in the `configs/rec/rec_icdar15_train.yml` file.
|
||||
|
||||
|
@ -325,10 +439,10 @@ python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec
|
|||
```
|
||||
|
||||
<a name="PREDICTION"></a>
|
||||
### PREDICTION
|
||||
### 4 PREDICTION
|
||||
|
||||
<a name="Training_engine_prediction"></a>
|
||||
* Training engine prediction
|
||||
#### 4.1 Training engine prediction
|
||||
|
||||
Using the model trained by paddleocr, you can quickly get prediction through the following script.
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# paddleocr package
|
||||
|
||||
## Get started quickly
|
||||
### install package
|
||||
## 1 Get started quickly
|
||||
### 1.1 install package
|
||||
install by pypi
|
||||
```bash
|
||||
pip install "paddleocr>=2.0.1" # Recommend to use version 2.0.1+
|
||||
|
@ -12,9 +12,11 @@ build own whl package and install
|
|||
python3 setup.py bdist_wheel
|
||||
pip3 install dist/paddleocr-x.x.x-py3-none-any.whl # x.x.x is the version of paddleocr
|
||||
```
|
||||
### 1. Use by code
|
||||
## 2 Use
|
||||
### 2.1 Use by code
|
||||
The paddleocr whl package will automatically download the ppocr lightweight model as the default model, which can be customized and replaced according to the section 3 **Custom Model**.
|
||||
|
||||
* detection classification and recognition
|
||||
* detection angle classification and recognition
|
||||
```python
|
||||
from paddleocr import PaddleOCR,draw_ocr
|
||||
# Paddleocr supports Chinese, English, French, German, Korean and Japanese.
|
||||
|
@ -163,7 +165,7 @@ Output will be a list, each item contains classification result and confidence
|
|||
['0', 0.99999964]
|
||||
```
|
||||
|
||||
### Use by command line
|
||||
### 2.2 Use by command line
|
||||
|
||||
show help information
|
||||
```bash
|
||||
|
@ -239,11 +241,11 @@ Output will be a list, each item contains classification result and confidence
|
|||
['0', 0.99999964]
|
||||
```
|
||||
|
||||
## Use custom model
|
||||
## 3 Use custom model
|
||||
When the built-in model cannot meet the needs, you need to use your own trained model.
|
||||
First, refer to the first section of [inference_en.md](./inference_en.md) to convert your det and rec model to inference model, and then use it as follows
|
||||
|
||||
### 1. Use by code
|
||||
### 3.1 Use by code
|
||||
|
||||
```python
|
||||
from paddleocr import PaddleOCR,draw_ocr
|
||||
|
@ -265,17 +267,17 @@ im_show = Image.fromarray(im_show)
|
|||
im_show.save('result.jpg')
|
||||
```
|
||||
|
||||
### Use by command line
|
||||
### 3.2 Use by command line
|
||||
|
||||
```bash
|
||||
paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_dir} --rec_model_dir {your_rec_model_dir} --rec_char_dict_path {your_rec_char_dict_path} --cls_model_dir {your_cls_model_dir} --use_angle_cls true
|
||||
```
|
||||
|
||||
### Use web images or numpy array as input
|
||||
## 4 Use web images or numpy array as input
|
||||
|
||||
1. Web image
|
||||
### 4.1 Web image
|
||||
|
||||
Use by code
|
||||
- Use by code
|
||||
```python
|
||||
from paddleocr import PaddleOCR, draw_ocr
|
||||
ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory
|
||||
|
@ -294,12 +296,12 @@ im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc
|
|||
im_show = Image.fromarray(im_show)
|
||||
im_show.save('result.jpg')
|
||||
```
|
||||
Use by command line
|
||||
- Use by command line
|
||||
```bash
|
||||
paddleocr --image_dir http://n.sinaimg.cn/ent/transform/w630h933/20171222/o111-fypvuqf1838418.jpg --use_angle_cls=true
|
||||
```
|
||||
|
||||
2. Numpy array
|
||||
### 4.2 Numpy array
|
||||
Support numpy array as input only when used by code
|
||||
|
||||
```python
|
||||
|
@ -324,7 +326,7 @@ im_show.save('result.jpg')
|
|||
```
|
||||
|
||||
|
||||
## Parameter Description
|
||||
## 5 Parameter Description
|
||||
|
||||
| Parameter | Description | Default value |
|
||||
|-------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------|
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
After Width: | Height: | Size: 65 KiB |
Binary file not shown.
After Width: | Height: | Size: 63 KiB |
Binary file not shown.
After Width: | Height: | Size: 61 KiB |
BIN
doc/joinus.PNG
BIN
doc/joinus.PNG
Binary file not shown.
Before Width: | Height: | Size: 107 KiB After Width: | Height: | Size: 109 KiB |
|
@ -146,7 +146,8 @@ def parse_args(mMain=True, add_help=True):
|
|||
# DB parmas
|
||||
parser.add_argument("--det_db_thresh", type=float, default=0.3)
|
||||
parser.add_argument("--det_db_box_thresh", type=float, default=0.5)
|
||||
parser.add_argument("--det_db_unclip_ratio", type=float, default=2.0)
|
||||
parser.add_argument("--det_db_unclip_ratio", type=float, default=1.6)
|
||||
parser.add_argument("--use_dilation", type=bool, default=False)
|
||||
|
||||
# EAST parmas
|
||||
parser.add_argument("--det_east_score_thresh", type=float, default=0.8)
|
||||
|
@ -193,7 +194,8 @@ def parse_args(mMain=True, add_help=True):
|
|||
det_limit_type='max',
|
||||
det_db_thresh=0.3,
|
||||
det_db_box_thresh=0.5,
|
||||
det_db_unclip_ratio=2.0,
|
||||
det_db_unclip_ratio=1.6,
|
||||
use_dilation=False,
|
||||
det_east_score_thresh=0.8,
|
||||
det_east_cover_thresh=0.1,
|
||||
det_east_nms_thresh=0.2,
|
||||
|
|
|
@ -33,7 +33,7 @@ import paddle.distributed as dist
|
|||
|
||||
from ppocr.data.imaug import transform, create_operators
|
||||
from ppocr.data.simple_dataset import SimpleDataSet
|
||||
from ppocr.data.lmdb_dataset import LMDBDateSet
|
||||
from ppocr.data.lmdb_dataset import LMDBDataSet
|
||||
|
||||
__all__ = ['build_dataloader', 'transform', 'create_operators']
|
||||
|
||||
|
@ -51,20 +51,21 @@ signal.signal(signal.SIGINT, term_mp)
|
|||
signal.signal(signal.SIGTERM, term_mp)
|
||||
|
||||
|
||||
def build_dataloader(config, mode, device, logger):
|
||||
def build_dataloader(config, mode, device, logger, seed=None):
|
||||
config = copy.deepcopy(config)
|
||||
|
||||
support_dict = ['SimpleDataSet', 'LMDBDateSet']
|
||||
support_dict = ['SimpleDataSet', 'LMDBDataSet']
|
||||
module_name = config[mode]['dataset']['name']
|
||||
assert module_name in support_dict, Exception(
|
||||
'DataSet only support {}'.format(support_dict))
|
||||
assert mode in ['Train', 'Eval', 'Test'
|
||||
], "Mode should be Train, Eval or Test."
|
||||
|
||||
dataset = eval(module_name)(config, mode, logger)
|
||||
dataset = eval(module_name)(config, mode, logger, seed)
|
||||
loader_config = config[mode]['loader']
|
||||
batch_size = loader_config['batch_size_per_card']
|
||||
drop_last = loader_config['drop_last']
|
||||
shuffle = loader_config['shuffle']
|
||||
num_workers = loader_config['num_workers']
|
||||
if 'use_shared_memory' in loader_config.keys():
|
||||
use_shared_memory = loader_config['use_shared_memory']
|
||||
|
@ -75,14 +76,14 @@ def build_dataloader(config, mode, device, logger):
|
|||
batch_sampler = DistributedBatchSampler(
|
||||
dataset=dataset,
|
||||
batch_size=batch_size,
|
||||
shuffle=False,
|
||||
shuffle=shuffle,
|
||||
drop_last=drop_last)
|
||||
else:
|
||||
#Distribute data to single card
|
||||
batch_sampler = BatchSampler(
|
||||
dataset=dataset,
|
||||
batch_size=batch_size,
|
||||
shuffle=False,
|
||||
shuffle=shuffle,
|
||||
drop_last=drop_last)
|
||||
|
||||
data_loader = DataLoader(
|
||||
|
|
|
@ -21,7 +21,7 @@ from .make_border_map import MakeBorderMap
|
|||
from .make_shrink_map import MakeShrinkMap
|
||||
from .random_crop_data import EastRandomCropData, PSERandomCrop
|
||||
|
||||
from .rec_img_aug import RecAug, RecResizeImg, ClsResizeImg
|
||||
from .rec_img_aug import RecAug, RecResizeImg, ClsResizeImg, SRNRecResizeImg
|
||||
from .randaugment import RandAugment
|
||||
from .operators import *
|
||||
from .label_ops import *
|
||||
|
|
|
@ -18,6 +18,7 @@ from __future__ import print_function
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import numpy as np
|
||||
import string
|
||||
|
||||
|
||||
class ClsLabelEncode(object):
|
||||
|
@ -92,18 +93,28 @@ class BaseRecLabelEncode(object):
|
|||
character_type='ch',
|
||||
use_space_char=False):
|
||||
support_character_type = [
|
||||
'ch', 'en', 'en_sensitive', 'french', 'german', 'japan', 'korean'
|
||||
'ch', 'en', 'EN_symbol', 'french', 'german', 'japan', 'korean',
|
||||
'EN', 'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs',
|
||||
'oc', 'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi',
|
||||
'mr', 'ne'
|
||||
]
|
||||
assert character_type in support_character_type, "Only {} are supported now but get {}".format(
|
||||
support_character_type, character_type)
|
||||
|
||||
self.max_text_len = max_text_length
|
||||
self.beg_str = "sos"
|
||||
self.end_str = "eos"
|
||||
if character_type == "en":
|
||||
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
|
||||
dict_character = list(self.character_str)
|
||||
elif character_type in ["ch", "french", "german", "japan", "korean"]:
|
||||
elif character_type == "EN_symbol":
|
||||
# same with ASTER setting (use 94 char).
|
||||
self.character_str = string.printable[:-6]
|
||||
dict_character = list(self.character_str)
|
||||
elif character_type in support_character_type:
|
||||
self.character_str = ""
|
||||
assert character_dict_path is not None, "character_dict_path should not be None when character_type is ch"
|
||||
assert character_dict_path is not None, "character_dict_path should not be None when character_type is {}".format(
|
||||
character_type)
|
||||
with open(character_dict_path, "rb") as fin:
|
||||
lines = fin.readlines()
|
||||
for line in lines:
|
||||
|
@ -112,11 +123,6 @@ class BaseRecLabelEncode(object):
|
|||
if use_space_char:
|
||||
self.character_str += " "
|
||||
dict_character = list(self.character_str)
|
||||
elif character_type == "en_sensitive":
|
||||
# same with ASTER setting (use 94 char).
|
||||
import string
|
||||
self.character_str = string.printable[:-6]
|
||||
dict_character = list(self.character_str)
|
||||
self.character_type = character_type
|
||||
dict_character = self.add_special_char(dict_character)
|
||||
self.dict = {}
|
||||
|
@ -193,16 +199,76 @@ class AttnLabelEncode(BaseRecLabelEncode):
|
|||
super(AttnLabelEncode,
|
||||
self).__init__(max_text_length, character_dict_path,
|
||||
character_type, use_space_char)
|
||||
self.beg_str = "sos"
|
||||
self.end_str = "eos"
|
||||
|
||||
def add_special_char(self, dict_character):
|
||||
dict_character = [self.beg_str, self.end_str] + dict_character
|
||||
self.beg_str = "sos"
|
||||
self.end_str = "eos"
|
||||
dict_character = [self.beg_str] + dict_character + [self.end_str]
|
||||
return dict_character
|
||||
|
||||
def __call__(self, text):
|
||||
def __call__(self, data):
|
||||
text = data['label']
|
||||
text = self.encode(text)
|
||||
return text
|
||||
if text is None:
|
||||
return None
|
||||
if len(text) >= self.max_text_len:
|
||||
return None
|
||||
data['length'] = np.array(len(text))
|
||||
text = [0] + text + [len(self.character) - 1] + [0] * (self.max_text_len
|
||||
- len(text) - 2)
|
||||
data['label'] = np.array(text)
|
||||
return data
|
||||
|
||||
def get_ignored_tokens(self):
|
||||
beg_idx = self.get_beg_end_flag_idx("beg")
|
||||
end_idx = self.get_beg_end_flag_idx("end")
|
||||
return [beg_idx, end_idx]
|
||||
|
||||
def get_beg_end_flag_idx(self, beg_or_end):
|
||||
if beg_or_end == "beg":
|
||||
idx = np.array(self.dict[self.beg_str])
|
||||
elif beg_or_end == "end":
|
||||
idx = np.array(self.dict[self.end_str])
|
||||
else:
|
||||
assert False, "Unsupport type %s in get_beg_end_flag_idx" \
|
||||
% beg_or_end
|
||||
return idx
|
||||
|
||||
|
||||
class SRNLabelEncode(BaseRecLabelEncode):
|
||||
""" Convert between text-label and text-index """
|
||||
|
||||
def __init__(self,
|
||||
max_text_length=25,
|
||||
character_dict_path=None,
|
||||
character_type='en',
|
||||
use_space_char=False,
|
||||
**kwargs):
|
||||
super(SRNLabelEncode,
|
||||
self).__init__(max_text_length, character_dict_path,
|
||||
character_type, use_space_char)
|
||||
|
||||
def add_special_char(self, dict_character):
|
||||
dict_character = dict_character + [self.beg_str, self.end_str]
|
||||
return dict_character
|
||||
|
||||
def __call__(self, data):
|
||||
text = data['label']
|
||||
text = self.encode(text)
|
||||
char_num = len(self.character)
|
||||
if text is None:
|
||||
return None
|
||||
if len(text) > self.max_text_len:
|
||||
return None
|
||||
data['length'] = np.array(len(text))
|
||||
text = text + [char_num - 1] * (self.max_text_len - len(text))
|
||||
data['label'] = np.array(text)
|
||||
return data
|
||||
|
||||
def get_ignored_tokens(self):
|
||||
beg_idx = self.get_beg_end_flag_idx("beg")
|
||||
end_idx = self.get_beg_end_flag_idx("end")
|
||||
return [beg_idx, end_idx]
|
||||
|
||||
def get_beg_end_flag_idx(self, beg_or_end):
|
||||
if beg_or_end == "beg":
|
||||
|
|
|
@ -32,7 +32,6 @@ class MakeShrinkMap(object):
|
|||
text_polys, ignore_tags = self.validate_polygons(text_polys,
|
||||
ignore_tags, h, w)
|
||||
gt = np.zeros((h, w), dtype=np.float32)
|
||||
# gt = np.zeros((1, h, w), dtype=np.float32)
|
||||
mask = np.ones((h, w), dtype=np.float32)
|
||||
for i in range(len(text_polys)):
|
||||
polygon = text_polys[i]
|
||||
|
@ -44,21 +43,34 @@ class MakeShrinkMap(object):
|
|||
ignore_tags[i] = True
|
||||
else:
|
||||
polygon_shape = Polygon(polygon)
|
||||
distance = polygon_shape.area * (
|
||||
1 - np.power(self.shrink_ratio, 2)) / polygon_shape.length
|
||||
subject = [tuple(l) for l in text_polys[i]]
|
||||
subject = [tuple(l) for l in polygon]
|
||||
padding = pyclipper.PyclipperOffset()
|
||||
padding.AddPath(subject, pyclipper.JT_ROUND,
|
||||
pyclipper.ET_CLOSEDPOLYGON)
|
||||
shrinked = padding.Execute(-distance)
|
||||
shrinked = []
|
||||
|
||||
# Increase the shrink ratio every time we get multiple polygon returned back
|
||||
possible_ratios = np.arange(self.shrink_ratio, 1,
|
||||
self.shrink_ratio)
|
||||
np.append(possible_ratios, 1)
|
||||
# print(possible_ratios)
|
||||
for ratio in possible_ratios:
|
||||
# print(f"Change shrink ratio to {ratio}")
|
||||
distance = polygon_shape.area * (
|
||||
1 - np.power(ratio, 2)) / polygon_shape.length
|
||||
shrinked = padding.Execute(-distance)
|
||||
if len(shrinked) == 1:
|
||||
break
|
||||
|
||||
if shrinked == []:
|
||||
cv2.fillPoly(mask,
|
||||
polygon.astype(np.int32)[np.newaxis, :, :], 0)
|
||||
ignore_tags[i] = True
|
||||
continue
|
||||
shrinked = np.array(shrinked[0]).reshape(-1, 2)
|
||||
cv2.fillPoly(gt, [shrinked.astype(np.int32)], 1)
|
||||
# cv2.fillPoly(gt[0], [shrinked.astype(np.int32)], 1)
|
||||
|
||||
for each_shirnk in shrinked:
|
||||
shirnk = np.array(each_shirnk).reshape(-1, 2)
|
||||
cv2.fillPoly(gt, [shirnk.astype(np.int32)], 1)
|
||||
|
||||
data['shrink_map'] = gt
|
||||
data['shrink_mask'] = mask
|
||||
|
@ -84,11 +96,12 @@ class MakeShrinkMap(object):
|
|||
return polygons, ignore_tags
|
||||
|
||||
def polygon_area(self, polygon):
|
||||
# return cv2.contourArea(polygon.astype(np.float32))
|
||||
edge = 0
|
||||
for i in range(polygon.shape[0]):
|
||||
next_index = (i + 1) % polygon.shape[0]
|
||||
edge += (polygon[next_index, 0] - polygon[i, 0]) * (
|
||||
polygon[next_index, 1] - polygon[i, 1])
|
||||
|
||||
return edge / 2.
|
||||
"""
|
||||
compute polygon area
|
||||
"""
|
||||
area = 0
|
||||
q = polygon[-1]
|
||||
for p in polygon:
|
||||
area += p[0] * q[1] - p[1] * q[0]
|
||||
q = p
|
||||
return area / 2.0
|
||||
|
|
|
@ -185,8 +185,8 @@ class DetResizeForTest(object):
|
|||
resize_h = int(h * ratio)
|
||||
resize_w = int(w * ratio)
|
||||
|
||||
resize_h = int(round(resize_h / 32) * 32)
|
||||
resize_w = int(round(resize_w / 32) * 32)
|
||||
resize_h = max(int(round(resize_h / 32) * 32), 32)
|
||||
resize_w = max(int(round(resize_w / 32) * 32), 32)
|
||||
|
||||
try:
|
||||
if int(resize_w) <= 0 or int(resize_h) <= 0:
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue