fix conflicts
This commit is contained in:
commit
c94428a880
|
@ -106,7 +106,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
getStr = lambda strId: self.stringBundle.getString(strId)
|
||||
|
||||
self.defaultSaveDir = defaultSaveDir
|
||||
self.ocr = PaddleOCR(use_pdserving=False, use_angle_cls=True, det=True, cls=True, use_gpu=True, lang=lang)
|
||||
self.ocr = PaddleOCR(use_pdserving=False, use_angle_cls=True, det=True, cls=True, use_gpu=False, lang=lang)
|
||||
|
||||
if os.path.exists('./data/paddle.png'):
|
||||
result = self.ocr.ocr('./data/paddle.png', cls=True, det=True)
|
||||
|
@ -274,6 +274,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
self.preButton.setIconSize(QSize(40, 100))
|
||||
self.preButton.clicked.connect(self.openPrevImg)
|
||||
self.preButton.setStyleSheet('border: none;')
|
||||
self.preButton.setShortcut('a')
|
||||
self.iconlist = QListWidget()
|
||||
self.iconlist.setViewMode(QListView.IconMode)
|
||||
self.iconlist.setFlow(QListView.TopToBottom)
|
||||
|
@ -289,12 +290,12 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
self.nextButton.setIconSize(QSize(40, 100))
|
||||
self.nextButton.setStyleSheet('border: none;')
|
||||
self.nextButton.clicked.connect(self.openNextImg)
|
||||
self.nextButton.setShortcut('d')
|
||||
|
||||
hlayout.addWidget(self.preButton)
|
||||
hlayout.addWidget(self.iconlist)
|
||||
hlayout.addWidget(self.nextButton)
|
||||
|
||||
# self.setLayout(hlayout)
|
||||
|
||||
iconListContainer = QWidget()
|
||||
iconListContainer.setLayout(hlayout)
|
||||
|
@ -359,11 +360,6 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
opendir = action(getStr('openDir'), self.openDirDialog,
|
||||
'Ctrl+u', 'open', getStr('openDir'))
|
||||
|
||||
openNextImg = action(getStr('nextImg'), self.openNextImg,
|
||||
'd', 'next', getStr('nextImgDetail'))
|
||||
|
||||
openPrevImg = action(getStr('prevImg'), self.openPrevImg,
|
||||
'a', 'prev', getStr('prevImgDetail'))
|
||||
|
||||
save = action(getStr('save'), self.saveFile,
|
||||
'Ctrl+V', 'verify', getStr('saveDetail'), enabled=False)
|
||||
|
@ -371,7 +367,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
alcm = action(getStr('choosemodel'), self.autolcm,
|
||||
'Ctrl+M', 'next', getStr('tipchoosemodel'))
|
||||
|
||||
deleteImg = action(getStr('deleteImg'), self.deleteImg, 'Ctrl+D', 'close', getStr('deleteImgDetail'),
|
||||
deleteImg = action(getStr('deleteImg'), self.deleteImg, 'Ctrl+Shift+D', 'close', getStr('deleteImgDetail'),
|
||||
enabled=True)
|
||||
|
||||
resetAll = action(getStr('resetAll'), self.resetAll, None, 'resetall', getStr('resetAllDetail'))
|
||||
|
@ -388,7 +384,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
'w', 'new', getStr('crtBoxDetail'), enabled=False)
|
||||
|
||||
delete = action(getStr('delBox'), self.deleteSelectedShape,
|
||||
'Delete', 'delete', getStr('delBoxDetail'), enabled=False)
|
||||
'backspace', 'delete', getStr('delBoxDetail'), enabled=False)
|
||||
copy = action(getStr('dupBox'), self.copySelectedShape,
|
||||
'Ctrl+C', 'copy', getStr('dupBoxDetail'),
|
||||
enabled=False)
|
||||
|
@ -446,8 +442,11 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
reRec = action(getStr('reRecognition'), self.reRecognition,
|
||||
'Ctrl+Shift+R', 'reRec', getStr('reRecognition'), enabled=False)
|
||||
|
||||
singleRere = action(getStr('singleRe'), self.singleRerecognition,
|
||||
'Ctrl+R', 'reRec', getStr('singleRe'), enabled=False)
|
||||
|
||||
createpoly = action(getStr('creatPolygon'), self.createPolygon,
|
||||
'p', 'new', 'Creat Polygon', enabled=True)
|
||||
'q', 'new', 'Creat Polygon', enabled=True)
|
||||
|
||||
saveRec = action(getStr('saveRec'), self.saveRecResult,
|
||||
'', 'save', getStr('saveRec'), enabled=False)
|
||||
|
@ -491,6 +490,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
icon='color', tip=getStr('shapeFillColorDetail'),
|
||||
enabled=False)
|
||||
|
||||
|
||||
# Label list context menu.
|
||||
labelMenu = QMenu()
|
||||
addActions(labelMenu, (edit, delete))
|
||||
|
@ -501,7 +501,6 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
|
||||
# Draw squares/rectangles
|
||||
self.drawSquaresOption = QAction(getStr('drawSquares'), self)
|
||||
self.drawSquaresOption.setShortcut('Ctrl+Shift+R')
|
||||
self.drawSquaresOption.setCheckable(True)
|
||||
self.drawSquaresOption.setChecked(settings.get(SETTING_DRAW_SQUARE, False))
|
||||
self.drawSquaresOption.triggered.connect(self.toogleDrawSquare)
|
||||
|
@ -509,7 +508,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
# Store actions for further handling.
|
||||
self.actions = struct(save=save, open=open, resetAll=resetAll, deleteImg=deleteImg,
|
||||
lineColor=color1, create=create, delete=delete, edit=edit, copy=copy,
|
||||
saveRec=saveRec,
|
||||
saveRec=saveRec, singleRere=singleRere,AutoRec=AutoRec,reRec=reRec,
|
||||
createMode=createMode, editMode=editMode,
|
||||
shapeLineColor=shapeLineColor, shapeFillColor=shapeFillColor,
|
||||
zoom=zoom, zoomIn=zoomIn, zoomOut=zoomOut, zoomOrg=zoomOrg,
|
||||
|
@ -518,9 +517,9 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
fileMenuActions=(
|
||||
open, opendir, saveLabel, resetAll, quit),
|
||||
beginner=(), advanced=(),
|
||||
editMenu=(createpoly, edit, copy, delete,
|
||||
editMenu=(createpoly, edit, copy, delete,singleRere,
|
||||
None, color1, self.drawSquaresOption),
|
||||
beginnerContext=(create, edit, copy, delete),
|
||||
beginnerContext=(create, edit, copy, delete, singleRere),
|
||||
advancedContext=(createMode, editMode, edit, copy,
|
||||
delete, shapeLineColor, shapeFillColor),
|
||||
onLoadActive=(
|
||||
|
@ -562,7 +561,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
zoomIn, zoomOut, zoomOrg, None,
|
||||
fitWindow, fitWidth))
|
||||
|
||||
addActions(self.menus.autolabel, (alcm, None, help)) #
|
||||
addActions(self.menus.autolabel, (AutoRec, reRec, alcm, None, help)) #
|
||||
|
||||
self.menus.file.aboutToShow.connect(self.updateFileMenu)
|
||||
|
||||
|
@ -572,6 +571,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
action('&Copy here', self.copyShape),
|
||||
action('&Move here', self.moveShape)))
|
||||
|
||||
|
||||
self.statusBar().showMessage('%s started.' % __appname__)
|
||||
self.statusBar().show()
|
||||
|
||||
|
@ -919,6 +919,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
self.actions.edit.setEnabled(selected)
|
||||
self.actions.shapeLineColor.setEnabled(selected)
|
||||
self.actions.shapeFillColor.setEnabled(selected)
|
||||
self.actions.singleRere.setEnabled(selected)
|
||||
|
||||
def addLabel(self, shape):
|
||||
shape.paintLabel = self.displayLabelOption.isChecked()
|
||||
|
@ -988,6 +989,19 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
self.updateComboBox()
|
||||
self.canvas.loadShapes(s)
|
||||
|
||||
def singleLabel(self, shape):
|
||||
if shape is None:
|
||||
# print('rm empty label')
|
||||
return
|
||||
item = self.shapesToItems[shape]
|
||||
item.setText(shape.label)
|
||||
self.updateComboBox()
|
||||
|
||||
# ADD:
|
||||
item = self.shapesToItemsbox[shape]
|
||||
item.setText(str([(int(p.x()), int(p.y())) for p in shape.points]))
|
||||
self.updateComboBox()
|
||||
|
||||
def updateComboBox(self):
|
||||
# Get the unique labels and add them to the Combobox.
|
||||
itemsTextList = [str(self.labelList.item(i).text()) for i in range(self.labelList.count())]
|
||||
|
@ -1441,6 +1455,8 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
self.haveAutoReced = False
|
||||
self.AutoRecognition.setEnabled(True)
|
||||
self.reRecogButton.setEnabled(True)
|
||||
self.actions.AutoRec.setEnabled(True)
|
||||
self.actions.reRec.setEnabled(True)
|
||||
self.actions.saveLabel.setEnabled(True)
|
||||
|
||||
|
||||
|
@ -1755,6 +1771,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
self.loadFile(self.filePath) # ADD
|
||||
self.haveAutoReced = True
|
||||
self.AutoRecognition.setEnabled(False)
|
||||
self.actions.AutoRec.setEnabled(False)
|
||||
self.setDirty()
|
||||
self.saveCacheLabel()
|
||||
|
||||
|
@ -1794,6 +1811,27 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
else:
|
||||
QMessageBox.information(self, "Information", "Draw a box!")
|
||||
|
||||
def singleRerecognition(self):
|
||||
img = cv2.imread(self.filePath)
|
||||
shape = self.canvas.selectedShape
|
||||
box = [[int(p.x()), int(p.y())] for p in shape.points]
|
||||
assert len(box) == 4
|
||||
img_crop = get_rotate_crop_image(img, np.array(box, np.float32))
|
||||
if img_crop is None:
|
||||
msg = 'Can not recognise the detection box in ' + self.filePath + '. Please change manually'
|
||||
QMessageBox.information(self, "Information", msg)
|
||||
return
|
||||
result = self.ocr.ocr(img_crop, cls=True, det=False)
|
||||
if result[0][0] is not '':
|
||||
result.insert(0, box)
|
||||
print('result in reRec is ', result)
|
||||
if result[1][0] == shape.label:
|
||||
print('label no change')
|
||||
else:
|
||||
shape.label = result[1][0]
|
||||
self.singleLabel(shape)
|
||||
self.setDirty()
|
||||
print(box)
|
||||
|
||||
def autolcm(self):
|
||||
vbox = QVBoxLayout()
|
||||
|
@ -1825,6 +1863,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
self.dialog.exec_()
|
||||
if self.filePath:
|
||||
self.AutoRecognition.setEnabled(True)
|
||||
self.actions.AutoRec.setEnabled(True)
|
||||
|
||||
|
||||
def modelChoose(self):
|
||||
|
|
|
@ -6,6 +6,10 @@ PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field. I
|
|||
|
||||
<img src="./data/gif/steps_en.gif" width="100%"/>
|
||||
|
||||
### Recent Update
|
||||
|
||||
- 2020.12.18: Support re-recognition of a single label box (by [ninetailskim](https://github.com/ninetailskim) ), perfect shortcut keys.
|
||||
|
||||
## Installation
|
||||
|
||||
### 1. Install PaddleOCR
|
||||
|
@ -92,11 +96,30 @@ Therefore, if the recognition result has been manually changed before, it may ch
|
|||
|
||||
## Explanation
|
||||
|
||||
### Shortcut keys
|
||||
|
||||
| Shortcut keys | Description |
|
||||
| ---------------- | ------------------------------------------------ |
|
||||
| Ctrl + shift + A | Automatically label all unchecked images |
|
||||
| Ctrl + shift + R | Re-recognize all the labels of the current image |
|
||||
| W | Create a rect box |
|
||||
| Q | Create a four-points box |
|
||||
| Ctrl + E | Edit label of the selected box |
|
||||
| Ctrl + R | Re-recognize the selected box |
|
||||
| Backspace | Delete the selected box |
|
||||
| Ctrl + V | Check image |
|
||||
| Ctrl + Shift + d | Delete image |
|
||||
| D | Next image |
|
||||
| A | Previous image |
|
||||
| Ctrl++ | Zoom in |
|
||||
| Ctrl-- | Zoom out |
|
||||
| ↑→↓← | Move selected box |
|
||||
|
||||
### Built-in Model
|
||||
|
||||
- Default model: PPOCRLabel uses the Chinese and English ultra-lightweight OCR model in PaddleOCR by default, supports Chinese, English and number recognition, and multiple language detection.
|
||||
|
||||
- Model language switching: Changing the built-in model language is supportable by clicking "PaddleOCR"-"Choose OCR Model" in the menu bar. Currently supported languagesinclude French, German, Korean, and Japanese.
|
||||
- Model language switching: Changing the built-in model language is supportable by clicking "PaddleOCR"-"Choose OCR Model" in the menu bar. Currently supported languagesinclude French, German, Korean, and Japanese.
|
||||
For specific model download links, please refer to [PaddleOCR Model List](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/models_list_en.md#multilingual-recognition-modelupdating)
|
||||
|
||||
- Custom model: The model trained by users can be replaced by modifying PPOCRLabel.py in [PaddleOCR class instantiation](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/PPOCRLabel/PPOCRLabel.py#L110) referring [Custom Model Code](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/whl_en.md#use-custom-model)
|
||||
|
|
|
@ -6,6 +6,10 @@ PPOCRLabel是一款适用于OCR领域的半自动化图形标注工具,使用p
|
|||
|
||||
<img src="./data/gif/steps.gif" width="100%"/>
|
||||
|
||||
#### 近期更新
|
||||
|
||||
- 2020.12.18: 支持对单个标记框进行重新识别(by [ninetailskim](https://github.com/ninetailskim) ),完善快捷键。
|
||||
|
||||
## 安装
|
||||
|
||||
### 1. 安装PaddleOCR
|
||||
|
@ -72,6 +76,26 @@ python3 PPOCRLabel.py --lang ch
|
|||
| crop_img | 识别数据。按照检测框切割后的图片。与rec_gt.txt同时产生。 |
|
||||
|
||||
## 说明
|
||||
|
||||
### 快捷键
|
||||
|
||||
| 快捷键 | 说明 |
|
||||
| ---------------- | ---------------------------- |
|
||||
| Ctrl + shift + A | 自动标注所有未确认过的图片 |
|
||||
| Ctrl + shift + R | 对当前图片的所有标记重新识别 |
|
||||
| W | 新建矩形框 |
|
||||
| Q | 新建四点框 |
|
||||
| Ctrl + E | 编辑所选框标签 |
|
||||
| Ctrl + R | 重新识别所选标记 |
|
||||
| Backspace | 删除所选框 |
|
||||
| Ctrl + V | 确认本张图片标记 |
|
||||
| Ctrl + Shift + d | 删除本张图片 |
|
||||
| D | 下一张图片 |
|
||||
| A | 上一张图片 |
|
||||
| Ctrl++ | 缩小 |
|
||||
| Ctrl-- | 放大 |
|
||||
| ↑→↓← | 移动标记框 |
|
||||
|
||||
### 内置模型
|
||||
|
||||
- 默认模型:PPOCRLabel默认使用PaddleOCR中的中英文超轻量OCR模型,支持中英文与数字识别,多种语言检测。
|
||||
|
|
|
@ -46,8 +46,9 @@ class Worker(QThread):
|
|||
chars = res[1][0]
|
||||
cond = res[1][1]
|
||||
posi = res[0]
|
||||
strs += "Transcription: " + chars + " Probability: " + str(
|
||||
cond) + " Location: " + json.dumps(posi) + '\n'
|
||||
strs += "Transcription: " + chars + " Probability: " + str(cond) + \
|
||||
" Location: " + json.dumps(posi) +'\n'
|
||||
# Sending large amounts of data repeatedly through pyqtSignal may affect the program efficiency
|
||||
self.listValue.emit(strs)
|
||||
self.mainThread.result_dic = self.result_dic
|
||||
self.mainThread.filePath = Imgpath
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -94,4 +94,5 @@ ok=确认
|
|||
autolabeling=自动标注中
|
||||
hideBox=隐藏所有标注
|
||||
showBox=显示所有标注
|
||||
saveLabel=保存标记结果
|
||||
saveLabel=保存标记结果
|
||||
singleRe=重识别此区块
|
|
@ -1,70 +0,0 @@
|
|||
saveAsDetail=將標籤保存到其他文件
|
||||
changeSaveDir=改變存放目錄
|
||||
openFile=開啟檔案
|
||||
shapeLineColorDetail=更改線條顏色
|
||||
resetAll=重置
|
||||
crtBox=創建區塊
|
||||
crtBoxDetail=畫一個區塊
|
||||
dupBoxDetail=複製區塊
|
||||
verifyImg=驗證圖像
|
||||
zoominDetail=放大
|
||||
verifyImgDetail=驗證圖像
|
||||
saveDetail=將標籤存到
|
||||
openFileDetail=打開圖像
|
||||
fitWidthDetail=調整到窗口寬度
|
||||
tutorial=YouTube教學
|
||||
editLabel=編輯標籤
|
||||
openAnnotationDetail=打開標籤文件
|
||||
quit=結束
|
||||
shapeFillColorDetail=更改填充顏色
|
||||
closeCurDetail=關閉目前檔案
|
||||
closeCur=關閉
|
||||
deleteImg=刪除圖像
|
||||
deleteImgDetail=刪除目前圖像
|
||||
fitWin=調整到跟窗口一樣大小
|
||||
delBox=刪除選取區塊
|
||||
boxLineColorDetail=選擇框線顏色
|
||||
originalsize=原始大小
|
||||
resetAllDetail=重設所有設定
|
||||
zoomoutDetail=畫面放大
|
||||
save=儲存
|
||||
saveAs=另存為
|
||||
fitWinDetail=縮放到窗口一樣
|
||||
openDir=開啟目錄
|
||||
copyPrevBounding=複製當前圖像中的上一個邊界框
|
||||
showHide=顯示/隱藏標籤
|
||||
changeSaveFormat=更改儲存格式
|
||||
shapeFillColor=填充顏色
|
||||
quitApp=離開本程式
|
||||
dupBox=複製區塊
|
||||
delBoxDetail=刪除區塊
|
||||
zoomin=放大畫面
|
||||
info=資訊
|
||||
openAnnotation=開啟標籤
|
||||
prevImgDetail=上一個圖像
|
||||
fitWidth=縮放到跟畫面一樣寬
|
||||
zoomout=縮小畫面
|
||||
changeSavedAnnotationDir=更改預設標籤存的目錄
|
||||
nextImgDetail=下一個圖像
|
||||
originalsizeDetail=放大到原始大小
|
||||
prevImg=上一個圖像
|
||||
tutorialDetail=顯示示範內容
|
||||
shapeLineColor=形狀線條顏色
|
||||
boxLineColor=日期分隔線顏色
|
||||
editLabelDetail=修改所選區塊的標籤
|
||||
nextImg=下一張圖片
|
||||
useDefaultLabel=使用預設標籤
|
||||
useDifficult=有難度的
|
||||
boxLabelText=區塊的標籤
|
||||
labels=標籤
|
||||
autoSaveMode=自動儲存模式
|
||||
singleClsMode=單一類別模式
|
||||
displayLabel=顯示類別
|
||||
fileList=檔案清單
|
||||
files=檔案
|
||||
iconList=XX
|
||||
icon=XX
|
||||
advancedMode=進階模式
|
||||
advancedModeDetail=切到進階模式
|
||||
showAllBoxDetail=顯示所有區塊
|
||||
hideAllBoxDetail=隱藏所有區塊
|
|
@ -94,4 +94,5 @@ ok=OK
|
|||
autolabeling=Automatic Labeling
|
||||
hideBox=Hide All Box
|
||||
showBox=Show All Box
|
||||
saveLabel=Save Label
|
||||
saveLabel=Save Label
|
||||
singleRe=Re-recognition RectBox
|
|
@ -122,8 +122,7 @@ For a new language request, please refer to [Guideline for new language_requests
|
|||
<img src="./doc/ppocr_framework.png" width="800">
|
||||
</div>
|
||||
|
||||
PP-OCR is a practical ultra-lightweight OCR system. It is mainly composed of three parts: DB text detection, detection frame correction and CRNN text recognition. The system adopts 19 effective strategies from 8 aspects including backbone network selection and adjustment, prediction head design, data augmentation, learning rate transformation strategy, regularization parameter selection, pre-training model use, and automatic model tailoring and quantization to optimize and slim down the models of each module. The final results are an ultra-lightweight Chinese and English OCR model with an overall size of 3.5M and a 2.8M English digital OCR model. For more details, please refer to the PP-OCR technical article (https://arxiv.org/abs/2009.09941). Besides, The implementation of the FPGM Pruner and PACT quantization is based on [PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim).
|
||||
|
||||
PP-OCR is a practical ultra-lightweight OCR system. It is mainly composed of three parts: DB text detection[2], detection frame correction and CRNN text recognition[7]. The system adopts 19 effective strategies from 8 aspects including backbone network selection and adjustment, prediction head design, data augmentation, learning rate transformation strategy, regularization parameter selection, pre-training model use, and automatic model tailoring and quantization to optimize and slim down the models of each module. The final results are an ultra-lightweight Chinese and English OCR model with an overall size of 3.5M and a 2.8M English digital OCR model. For more details, please refer to the PP-OCR technical article (https://arxiv.org/abs/2009.09941). Besides, The implementation of the FPGM Pruner [8] and PACT quantization [9] is based on [PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim).
|
||||
|
||||
|
||||
## Visualization [more](./doc/doc_en/visualization_en.md)
|
||||
|
|
|
@ -115,7 +115,7 @@ PaddleOCR同时支持动态图与静态图两种编程范式
|
|||
<img src="./doc/ppocr_framework.png" width="800">
|
||||
</div>
|
||||
|
||||
PP-OCR是一个实用的超轻量OCR系统。主要由DB文本检测、检测框矫正和CRNN文本识别三部分组成。该系统从骨干网络选择和调整、预测头部的设计、数据增强、学习率变换策略、正则化参数选择、预训练模型使用以及模型自动裁剪量化8个方面,采用19个有效策略,对各个模块的模型进行效果调优和瘦身,最终得到整体大小为3.5M的超轻量中英文OCR和2.8M的英文数字OCR。更多细节请参考PP-OCR技术方案 https://arxiv.org/abs/2009.09941 。其中FPGM裁剪器和PACT量化的实现可以参考[PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim)。
|
||||
PP-OCR是一个实用的超轻量OCR系统。主要由DB文本检测[2]、检测框矫正和CRNN文本识别三部分组成[7]。该系统从骨干网络选择和调整、预测头部的设计、数据增强、学习率变换策略、正则化参数选择、预训练模型使用以及模型自动裁剪量化8个方面,采用19个有效策略,对各个模块的模型进行效果调优和瘦身,最终得到整体大小为3.5M的超轻量中英文OCR和2.8M的英文数字OCR。更多细节请参考PP-OCR技术方案 https://arxiv.org/abs/2009.09941 。其中FPGM裁剪器[8]和PACT量化[9]的实现可以参考[PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim)。
|
||||
|
||||
<a name="效果展示"></a>
|
||||
## 效果展示 [more](./doc/doc_ch/visualization.md)
|
||||
|
|
|
@ -22,7 +22,7 @@ English | [简体中文](README_ch.md)
|
|||
</div>
|
||||
|
||||
|
||||
The Style-Text data synthesis tool is a tool based on Baidu's self-developed text editing algorithm "Editing Text in the Wild" [https://arxiv.org/abs/1908.03047](https://arxiv.org/abs/1908.03047).
|
||||
The Style-Text data synthesis tool is a tool based on Baidu and HUST cooperation research work, "Editing Text in the Wild" [https://arxiv.org/abs/1908.03047](https://arxiv.org/abs/1908.03047).
|
||||
|
||||
Different from the commonly used GAN-based data synthesis tools, the main framework of Style-Text includes:
|
||||
* (1) Text foreground style transfer module.
|
||||
|
@ -69,10 +69,15 @@ fusion_generator:
|
|||
1. You can run `tools/synth_image` and generate the demo image, which is saved in the current folder.
|
||||
|
||||
```python
|
||||
python3 -m tools.synth_image -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en
|
||||
python3 tools/synth_image.py -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en
|
||||
```
|
||||
|
||||
* Note: The language options is correspond to the corpus. Currently, the tool only supports English, Simplified Chinese and Korean.
|
||||
* Note 1: The language options is correspond to the corpus. Currently, the tool only supports English, Simplified Chinese and Korean.
|
||||
* Note 2: Synth-Text is mainly used to generate images for OCR recognition models.
|
||||
So the height of style images should be around 32 pixels. Images in other sizes may behave poorly.
|
||||
* Note 3: You can modify `use_gpu` in `configs/config.yml` to determine whether to use GPU for prediction.
|
||||
|
||||
|
||||
|
||||
For example, enter the following image and corpus `PaddleOCR`.
|
||||
|
||||
|
@ -136,9 +141,21 @@ We provide a general dataset containing Chinese, English and Korean (50,000 imag
|
|||
2. You can run the following command to start synthesis task:
|
||||
|
||||
``` bash
|
||||
python3 -m tools.synth_dataset.py -c configs/dataset_config.yml
|
||||
python3 tools/synth_dataset.py -c configs/dataset_config.yml
|
||||
```
|
||||
|
||||
We also provide example corpus and images in `examples` folder.
|
||||
<div align="center">
|
||||
<img src="examples/style_images/1.jpg" width="300">
|
||||
<img src="examples/style_images/2.jpg" width="300">
|
||||
</div>
|
||||
If you run the code above directly, you will get example output data in `output_data` folder.
|
||||
You will get synthesis images and labels as below:
|
||||
<div align="center">
|
||||
<img src="doc/images/12.png" width="800">
|
||||
</div>
|
||||
There will be some cache under the `label` folder. If the program exit unexpectedly, you can find cached labels there.
|
||||
When the program finish normally, you will find all the labels in `label.txt` which give the final results.
|
||||
|
||||
<a name="Applications"></a>
|
||||
### Applications
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
</div>
|
||||
|
||||
|
||||
Style-Text数据合成工具是基于百度自研的文本编辑算法《Editing Text in the Wild》https://arxiv.org/abs/1908.03047
|
||||
Style-Text数据合成工具是基于百度和华科合作研发的文本编辑算法《Editing Text in the Wild》https://arxiv.org/abs/1908.03047
|
||||
|
||||
不同于常用的基于GAN的数据合成工具,Style-Text主要框架包括:1.文本前景风格迁移模块 2.背景抽取模块 3.融合模块。经过这样三步,就可以迅速实现图像文本风格迁移。下图是一些该数据合成工具效果图。
|
||||
|
||||
|
@ -61,9 +61,13 @@ fusion_generator:
|
|||
输入一张风格图和一段文字语料,运行tools/synth_image,合成单张图片,结果图像保存在当前目录下:
|
||||
|
||||
```python
|
||||
python3 -m tools.synth_image -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en
|
||||
python3 tools/synth_image.py -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en
|
||||
```
|
||||
* 注意:语言选项和语料相对应,目前该工具只支持英文、简体中文和韩语。
|
||||
* 注1:语言选项和语料相对应,目前该工具只支持英文、简体中文和韩语。
|
||||
* 注2:Style-Text生成的数据主要应用于OCR识别场景。基于当前PaddleOCR识别模型的设计,我们主要支持高度在32左右的风格图像。
|
||||
如果输入图像尺寸相差过多,效果可能不佳。
|
||||
* 注3:可以通过修改配置文件中的`use_gpu`(true或者false)参数来决定是否使用GPU进行预测。
|
||||
|
||||
|
||||
例如,输入如下图片和语料"PaddleOCR":
|
||||
|
||||
|
@ -124,8 +128,21 @@ python3 -m tools.synth_image -c configs/config.yml --style_image examples/style_
|
|||
2. 运行`tools/synth_dataset`合成数据:
|
||||
|
||||
``` bash
|
||||
python3 -m tools.synth_dataset -c configs/dataset_config.yml
|
||||
python3 tools/synth_dataset.py -c configs/dataset_config.yml
|
||||
```
|
||||
我们在examples目录下提供了样例图片和语料。
|
||||
<div align="center">
|
||||
<img src="examples/style_images/1.jpg" width="300">
|
||||
<img src="examples/style_images/2.jpg" width="300">
|
||||
</div>
|
||||
|
||||
直接运行上述命令,可以在output_data中产生样例输出,包括图片和用于训练识别模型的标注文件:
|
||||
<div align="center">
|
||||
<img src="doc/images/12.png" width="800">
|
||||
</div>
|
||||
|
||||
其中label目录下的标注文件为程序运行过程中产生的缓存,如果程序在中途异常终止,可以使用缓存的标注文件。
|
||||
如果程序正常运行完毕,则会在output_data下生成label.txt,为最终的标注结果。
|
||||
|
||||
<a name="应用案例"></a>
|
||||
### 四、应用案例
|
||||
|
|
|
@ -33,7 +33,7 @@ Predictor:
|
|||
- 0.5
|
||||
expand_result: false
|
||||
bg_generator:
|
||||
pretrain: models/style_text_rec/bg_generator
|
||||
pretrain: style_text_models/bg_generator
|
||||
module_name: bg_generator
|
||||
generator_type: BgGeneratorWithMask
|
||||
encode_dim: 64
|
||||
|
@ -43,7 +43,7 @@ Predictor:
|
|||
conv_block_dilation: true
|
||||
output_factor: 1.05
|
||||
text_generator:
|
||||
pretrain: models/style_text_rec/text_generator
|
||||
pretrain: style_text_models/text_generator
|
||||
module_name: text_generator
|
||||
generator_type: TextGenerator
|
||||
encode_dim: 64
|
||||
|
@ -52,7 +52,7 @@ Predictor:
|
|||
conv_block_dropout: false
|
||||
conv_block_dilation: true
|
||||
fusion_generator:
|
||||
pretrain: models/style_text_rec/fusion_generator
|
||||
pretrain: style_text_models/fusion_generator
|
||||
module_name: fusion_generator
|
||||
generator_type: FusionGeneratorSimple
|
||||
encode_dim: 64
|
||||
|
|
Binary file not shown.
After Width: | Height: | Size: 148 KiB |
|
@ -28,6 +28,7 @@ class StyleTextRecPredictor(object):
|
|||
], "Generator {} not supported.".format(algorithm)
|
||||
use_gpu = config["Global"]['use_gpu']
|
||||
check_gpu(use_gpu)
|
||||
paddle.set_device('gpu' if use_gpu else 'cpu')
|
||||
self.logger = get_logger()
|
||||
self.generator = getattr(style_text_rec, algorithm)(config)
|
||||
self.height = config["Global"]["image_height"]
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
PaddleOCR
|
||||
Paddle
|
||||
飞桨文字识别
|
||||
|
|
|
@ -11,6 +11,14 @@
|
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
__dir__ = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.append(__dir__)
|
||||
sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
|
||||
|
||||
from engine.synthesisers import DatasetSynthesiser
|
||||
|
||||
|
||||
|
|
|
@ -16,13 +16,13 @@ import cv2
|
|||
import sys
|
||||
import glob
|
||||
|
||||
from utils.config import ArgsParser
|
||||
from engine.synthesisers import ImageSynthesiser
|
||||
|
||||
__dir__ = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.append(__dir__)
|
||||
sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
|
||||
|
||||
from utils.config import ArgsParser
|
||||
from engine.synthesisers import ImageSynthesiser
|
||||
|
||||
|
||||
def synth_image():
|
||||
args = ArgsParser().parse_args()
|
||||
|
|
|
@ -60,7 +60,8 @@ Metric:
|
|||
Train:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
label_file_path: [./train_data/art_latin_icdar_14pt/train_no_tt_test/train_label_json.txt, ./train_data/total_text_icdar_14pt/train_label_json.txt]
|
||||
data_dir: ./train_data/
|
||||
label_file_list: [./train_data/art_latin_icdar_14pt/train_no_tt_test/train_label_json.txt, ./train_data/total_text_icdar_14pt/train_label_json.txt]
|
||||
data_ratio_list: [0.5, 0.5]
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
|
|
|
@ -11,7 +11,7 @@ max_side_len 960
|
|||
det_db_thresh 0.3
|
||||
det_db_box_thresh 0.5
|
||||
det_db_unclip_ratio 2.0
|
||||
det_model_dir ./inference/ch__ppocr_mobile_v2.0_det_infer/
|
||||
det_model_dir ./inference/ch_ppocr_mobile_v2.0_det_infer/
|
||||
|
||||
# cls config
|
||||
use_angle_cls 0
|
||||
|
|
|
@ -9,9 +9,9 @@
|
|||
### 1.文本检测算法
|
||||
|
||||
PaddleOCR开源的文本检测算法列表:
|
||||
- [x] DB([paper]( https://arxiv.org/abs/1911.08947) )(ppocr推荐)
|
||||
- [x] EAST([paper](https://arxiv.org/abs/1704.03155))
|
||||
- [x] SAST([paper](https://arxiv.org/abs/1908.05498))
|
||||
- [x] DB([paper]( https://arxiv.org/abs/1911.08947)) [2](ppocr推荐)
|
||||
- [x] EAST([paper](https://arxiv.org/abs/1704.03155))[1]
|
||||
- [x] SAST([paper](https://arxiv.org/abs/1908.05498))[4]
|
||||
|
||||
在ICDAR2015文本检测公开数据集上,算法效果如下:
|
||||
|
||||
|
@ -38,13 +38,13 @@ PaddleOCR文本检测算法的训练和使用请参考文档教程中[模型训
|
|||
### 2.文本识别算法
|
||||
|
||||
PaddleOCR基于动态图开源的文本识别算法列表:
|
||||
- [x] CRNN([paper](https://arxiv.org/abs/1507.05717) )(ppocr推荐)
|
||||
- [x] Rosetta([paper](https://arxiv.org/abs/1910.05085))
|
||||
- [ ] STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html)) coming soon
|
||||
- [ ] RARE([paper](https://arxiv.org/abs/1603.03915v1)) coming soon
|
||||
- [ ] SRN([paper](https://arxiv.org/abs/2003.12294)) coming soon
|
||||
- [x] CRNN([paper](https://arxiv.org/abs/1507.05717))[7](ppocr推荐)
|
||||
- [x] Rosetta([paper](https://arxiv.org/abs/1910.05085))[10]
|
||||
- [ ] STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html))[11] coming soon
|
||||
- [ ] RARE([paper](https://arxiv.org/abs/1603.03915v1))[12] coming soon
|
||||
- [ ] SRN([paper](https://arxiv.org/abs/2003.12294))[5] coming soon
|
||||
|
||||
参考[DTRB](https://arxiv.org/abs/1904.01906)文字识别训练和评估流程,使用MJSynth和SynthText两个文字识别数据集训练,在IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE数据集上进行评估,算法效果如下:
|
||||
参考[DTRB][3](https://arxiv.org/abs/1904.01906)文字识别训练和评估流程,使用MJSynth和SynthText两个文字识别数据集训练,在IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE数据集上进行评估,算法效果如下:
|
||||
|
||||
|模型|骨干网络|Avg Accuracy|模型存储命名|下载链接|
|
||||
|-|-|-|-|-|
|
||||
|
|
|
@ -117,7 +117,7 @@ python3 tools/eval.py -c configs/cls/cls_mv3.yml -o Global.checkpoints={path/to/
|
|||
|
||||
```
|
||||
# 预测分类结果
|
||||
python3 tools/infer_cls.py -c configs/cls/cls_mv3.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/ch/word_1.jpg
|
||||
python3 tools/infer_cls.py -c configs/cls/cls_mv3.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/ch/word_1.jpg
|
||||
```
|
||||
|
||||
预测图片:
|
||||
|
|
|
@ -120,16 +120,16 @@ python3 tools/eval.py -c configs/det/det_mv3_db.yml -o Global.checkpoints="{pat
|
|||
|
||||
测试单张图像的检测效果
|
||||
```shell
|
||||
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.checkpoints="./output/det_db/best_accuracy"
|
||||
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false
|
||||
```
|
||||
|
||||
测试DB模型时,调整后处理阈值,
|
||||
```shell
|
||||
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.checkpoints="./output/det_db/best_accuracy" PostProcess.box_thresh=0.6 PostProcess.unclip_ratio=1.5
|
||||
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false PostProcess.box_thresh=0.6 PostProcess.unclip_ratio=1.5
|
||||
```
|
||||
|
||||
|
||||
测试文件夹下所有图像的检测效果
|
||||
```shell
|
||||
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/" Global.checkpoints="./output/det_db/best_accuracy"
|
||||
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false
|
||||
```
|
||||
|
|
|
@ -245,7 +245,10 @@ python3 tools/infer/predict_det.py --det_algorithm="SAST" --image_dir="./doc/img
|
|||
超轻量中文识别模型推理,可以执行如下命令:
|
||||
|
||||
```
|
||||
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --rec_model_dir="./inference/rec_crnn/"
|
||||
# 下载超轻量中文识别模型:
|
||||
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar
|
||||
tar xf ch_ppocr_mobile_v2.0_rec_infer.tar
|
||||
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --rec_model_dir="ch_ppocr_mobile_v2.0_rec_infer"
|
||||
```
|
||||
|
||||
![](../imgs_words/ch/word_4.jpg)
|
||||
|
@ -266,7 +269,6 @@ Predicts of ./doc/imgs_words/ch/word_4.jpg:('实力活力', 0.98458153)
|
|||
|
||||
```
|
||||
python3 tools/export_model.py -c configs/rec/rec_r34_vd_none_bilstm_ctc.yml -o Global.pretrained_model=./rec_r34_vd_none_bilstm_ctc_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/rec_crnn
|
||||
|
||||
```
|
||||
|
||||
CRNN 文本识别模型推理,可以执行如下命令:
|
||||
|
@ -327,7 +329,10 @@ Predicts of ./doc/imgs_words/korean/1.jpg:('바탕으로', 0.9948904)
|
|||
方向分类模型推理,可以执行如下命令:
|
||||
|
||||
```
|
||||
python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --cls_model_dir="./inference/cls/"
|
||||
# 下载超轻量中文方向分类器模型:
|
||||
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar
|
||||
tar xf ch_ppocr_mobile_v2.0_cls_infer.tar
|
||||
python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --cls_model_dir="ch_ppocr_mobile_v2.0_cls_infer"
|
||||
```
|
||||
|
||||
![](../imgs_words/ch/word_1.jpg)
|
||||
|
|
|
@ -324,7 +324,6 @@ Eval:
|
|||
|
||||
评估数据集可以通过 `configs/rec/rec_icdar15_train.yml` 修改Eval中的 `label_file_path` 设置。
|
||||
|
||||
*注意* 评估时必须确保配置文件中 infer_img 字段为空
|
||||
```
|
||||
# GPU 评估, Global.checkpoints 为待测权重
|
||||
python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy
|
||||
|
@ -342,7 +341,7 @@ python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec
|
|||
|
||||
```
|
||||
# 预测英文结果
|
||||
python3 tools/infer_rec.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/en/word_1.png
|
||||
python3 tools/infer_rec.py -c configs/rec/rec_icdar15_train.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/en/word_1.png
|
||||
```
|
||||
|
||||
预测图片:
|
||||
|
@ -361,7 +360,7 @@ infer_img: doc/imgs_words/en/word_1.png
|
|||
|
||||
```
|
||||
# 预测中文结果
|
||||
python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/ch/word_1.jpg
|
||||
python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/ch/word_1.jpg
|
||||
```
|
||||
|
||||
预测图片:
|
||||
|
|
|
@ -11,11 +11,12 @@
|
|||
}
|
||||
|
||||
2. DB:
|
||||
@article{liao2019real,
|
||||
title={Real-time Scene Text Detection with Differentiable Binarization},
|
||||
@inproceedings{liao2020real,
|
||||
title={Real-Time Scene Text Detection with Differentiable Binarization.},
|
||||
author={Liao, Minghui and Wan, Zhaoyi and Yao, Cong and Chen, Kai and Bai, Xiang},
|
||||
journal={arXiv preprint arXiv:1911.08947},
|
||||
year={2019}
|
||||
booktitle={AAAI},
|
||||
pages={11474--11481},
|
||||
year={2020}
|
||||
}
|
||||
|
||||
3. DTRB:
|
||||
|
@ -37,10 +38,11 @@
|
|||
}
|
||||
|
||||
5. SRN:
|
||||
@article{yu2020towards,
|
||||
title={Towards Accurate Scene Text Recognition with Semantic Reasoning Networks},
|
||||
author={Yu, Deli and Li, Xuan and Zhang, Chengquan and Han, Junyu and Liu, Jingtuo and Ding, Errui},
|
||||
journal={arXiv preprint arXiv:2003.12294},
|
||||
@inproceedings{yu2020towards,
|
||||
title={Towards accurate scene text recognition with semantic reasoning networks},
|
||||
author={Yu, Deli and Li, Xuan and Zhang, Chengquan and Liu, Tao and Han, Junyu and Liu, Jingtuo and Ding, Errui},
|
||||
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
|
||||
pages={12113--12122},
|
||||
year={2020}
|
||||
}
|
||||
|
||||
|
@ -52,4 +54,62 @@
|
|||
pages={9086--9095},
|
||||
year={2019}
|
||||
}
|
||||
```
|
||||
|
||||
7. CRNN:
|
||||
@article{shi2016end,
|
||||
title={An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition},
|
||||
author={Shi, Baoguang and Bai, Xiang and Yao, Cong},
|
||||
journal={IEEE transactions on pattern analysis and machine intelligence},
|
||||
volume={39},
|
||||
number={11},
|
||||
pages={2298--2304},
|
||||
year={2016},
|
||||
publisher={IEEE}
|
||||
}
|
||||
|
||||
8. FPGM:
|
||||
@inproceedings{he2019filter,
|
||||
title={Filter pruning via geometric median for deep convolutional neural networks acceleration},
|
||||
author={He, Yang and Liu, Ping and Wang, Ziwei and Hu, Zhilan and Yang, Yi},
|
||||
booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
|
||||
pages={4340--4349},
|
||||
year={2019}
|
||||
}
|
||||
|
||||
9. PACT:
|
||||
@article{choi2018pact,
|
||||
title={Pact: Parameterized clipping activation for quantized neural networks},
|
||||
author={Choi, Jungwook and Wang, Zhuo and Venkataramani, Swagath and Chuang, Pierce I-Jen and Srinivasan, Vijayalakshmi and Gopalakrishnan, Kailash},
|
||||
journal={arXiv preprint arXiv:1805.06085},
|
||||
year={2018}
|
||||
}
|
||||
|
||||
10.Rosetta
|
||||
@inproceedings{borisyuk2018rosetta,
|
||||
title={Rosetta: Large scale system for text detection and recognition in images},
|
||||
author={Borisyuk, Fedor and Gordo, Albert and Sivakumar, Viswanath},
|
||||
booktitle={Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery \& Data Mining},
|
||||
pages={71--79},
|
||||
year={2018}
|
||||
}
|
||||
|
||||
11.STAR-Net
|
||||
@inproceedings{liu2016star,
|
||||
title={STAR-Net: A SpaTial Attention Residue Network for Scene Text Recognition.},
|
||||
author={Liu, Wei and Chen, Chaofeng and Wong, Kwan-Yee K and Su, Zhizhong and Han, Junyu},
|
||||
booktitle={BMVC},
|
||||
volume={2},
|
||||
pages={7},
|
||||
year={2016}
|
||||
}
|
||||
|
||||
12.RARE
|
||||
@inproceedings{shi2016robust,
|
||||
title={Robust scene text recognition with automatic rectification},
|
||||
author={Shi, Baoguang and Wang, Xinggang and Lyu, Pengyuan and Yao, Cong and Bai, Xiang},
|
||||
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
|
||||
pages={4168--4176},
|
||||
year={2016}
|
||||
}
|
||||
|
||||
```
|
||||
|
|
|
@ -11,9 +11,9 @@ This tutorial lists the text detection algorithms and text recognition algorithm
|
|||
### 1. Text Detection Algorithm
|
||||
|
||||
PaddleOCR open source text detection algorithms list:
|
||||
- [x] EAST([paper](https://arxiv.org/abs/1704.03155))
|
||||
- [x] DB([paper](https://arxiv.org/abs/1911.08947))
|
||||
- [x] SAST([paper](https://arxiv.org/abs/1908.05498) )(Baidu Self-Research)
|
||||
- [x] EAST([paper](https://arxiv.org/abs/1704.03155))[2]
|
||||
- [x] DB([paper](https://arxiv.org/abs/1911.08947))[1]
|
||||
- [x] SAST([paper](https://arxiv.org/abs/1908.05498))[4]
|
||||
|
||||
On the ICDAR2015 dataset, the text detection result is as follows:
|
||||
|
||||
|
@ -39,11 +39,11 @@ For the training guide and use of PaddleOCR text detection algorithms, please re
|
|||
### 2. Text Recognition Algorithm
|
||||
|
||||
PaddleOCR open-source text recognition algorithms list:
|
||||
- [x] CRNN([paper](https://arxiv.org/abs/1507.05717))
|
||||
- [x] Rosetta([paper](https://arxiv.org/abs/1910.05085))
|
||||
- [ ] STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html)) coming soon
|
||||
- [ ] RARE([paper](https://arxiv.org/abs/1603.03915v1)) coming soon
|
||||
- [ ] SRN([paper](https://arxiv.org/abs/2003.12294) )(Baidu Self-Research) coming soon
|
||||
- [x] CRNN([paper](https://arxiv.org/abs/1507.05717))[7]
|
||||
- [x] Rosetta([paper](https://arxiv.org/abs/1910.05085))[10]
|
||||
- [ ] STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html))[11] coming soon
|
||||
- [ ] RARE([paper](https://arxiv.org/abs/1603.03915v1))[12] coming soon
|
||||
- [ ] SRN([paper](https://arxiv.org/abs/2003.12294))[5] coming soon
|
||||
|
||||
Refer to [DTRB](https://arxiv.org/abs/1904.01906), the training and evaluation result of these above text recognition (using MJSynth and SynthText for training, evaluate on IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE) is as follow:
|
||||
|
||||
|
|
|
@ -119,7 +119,7 @@ Use `Global.infer_img` to specify the path of the predicted picture or folder, a
|
|||
|
||||
```
|
||||
# Predict English results
|
||||
python3 tools/infer_cls.py -c configs/cls/cls_mv3.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words_en/word_10.png
|
||||
python3 tools/infer_cls.py -c configs/cls/cls_mv3.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words_en/word_10.png
|
||||
```
|
||||
|
||||
Input image:
|
||||
|
|
|
@ -113,16 +113,16 @@ python3 tools/eval.py -c configs/det/det_mv3_db.yml -o Global.checkpoints="{pat
|
|||
|
||||
Test the detection result on a single image:
|
||||
```shell
|
||||
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.checkpoints="./output/det_db/best_accuracy"
|
||||
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false
|
||||
```
|
||||
|
||||
When testing the DB model, adjust the post-processing threshold:
|
||||
```shell
|
||||
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.checkpoints="./output/det_db/best_accuracy" PostProcess.box_thresh=0.6 PostProcess.unclip_ratio=1.5
|
||||
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false PostProcess.box_thresh=0.6 PostProcess.unclip_ratio=1.5
|
||||
```
|
||||
|
||||
|
||||
Test the detection result on all images in the folder:
|
||||
```shell
|
||||
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/" Global.checkpoints="./output/det_db/best_accuracy"
|
||||
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false
|
||||
```
|
||||
|
|
|
@ -255,15 +255,18 @@ The following will introduce the lightweight Chinese recognition model inference
|
|||
For lightweight Chinese recognition model inference, you can execute the following commands:
|
||||
|
||||
```
|
||||
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --rec_model_dir="./inference/rec_crnn/"
|
||||
# download CRNN text recognition inference model
|
||||
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar
|
||||
tar xf ch_ppocr_mobile_v2.0_rec_infer.tar
|
||||
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_10.png" --rec_model_dir="ch_ppocr_mobile_v2.0_rec_infer"
|
||||
```
|
||||
|
||||
![](../imgs_words/ch/word_4.jpg)
|
||||
![](../imgs_words_en/word_10.png)
|
||||
|
||||
After executing the command, the prediction results (recognized text and score) of the above image will be printed on the screen.
|
||||
|
||||
```bash
|
||||
Predicts of ./doc/imgs_words/ch/word_4.jpg:('实力活力', 0.98458153)
|
||||
Predicts of ./doc/imgs_words_en/word_10.png:('PAIN', 0.9897658)
|
||||
```
|
||||
|
||||
<a name="CTC-BASED_RECOGNITION"></a>
|
||||
|
@ -339,7 +342,12 @@ For angle classification model inference, you can execute the following commands
|
|||
```
|
||||
python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words_en/word_10.png" --cls_model_dir="./inference/cls/"
|
||||
```
|
||||
|
||||
```
|
||||
# download text angle class inference model:
|
||||
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar
|
||||
tar xf ch_ppocr_mobile_v2.0_cls_infer.tar
|
||||
python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words_en/word_10.png" --cls_model_dir="ch_ppocr_mobile_v2.0_cls_infer"
|
||||
```
|
||||
![](../imgs_words_en/word_10.png)
|
||||
|
||||
After executing the command, the prediction results (classification angle and score) of the above image will be printed on the screen.
|
||||
|
|
|
@ -317,11 +317,11 @@ Eval:
|
|||
<a name="EVALUATION"></a>
|
||||
### EVALUATION
|
||||
|
||||
The evaluation data set can be modified via `configs/rec/rec_icdar15_reader.yml` setting of `label_file_path` in EvalReader.
|
||||
The evaluation dataset can be set by modifying the `Eval.dataset.label_file_list` field in the `configs/rec/rec_icdar15_train.yml` file.
|
||||
|
||||
```
|
||||
# GPU evaluation, Global.checkpoints is the weight to be tested
|
||||
python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_icdar15_reader.yml -o Global.checkpoints={path/to/weights}/best_accuracy
|
||||
python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy
|
||||
```
|
||||
|
||||
<a name="PREDICTION"></a>
|
||||
|
@ -336,7 +336,7 @@ The default prediction picture is stored in `infer_img`, and the weight is speci
|
|||
|
||||
```
|
||||
# Predict English results
|
||||
python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.checkpoints={path/to/weights}/best_accuracy TestReader.infer_img=doc/imgs_words/en/word_1.jpg
|
||||
python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/en/word_1.jpg
|
||||
```
|
||||
|
||||
Input image:
|
||||
|
@ -354,7 +354,7 @@ The configuration file used for prediction must be consistent with the training.
|
|||
|
||||
```
|
||||
# Predict Chinese results
|
||||
python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.checkpoints={path/to/weights}/best_accuracy TestReader.infer_img=doc/imgs_words/ch/word_1.jpg
|
||||
python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/ch/word_1.jpg
|
||||
```
|
||||
|
||||
Input image:
|
||||
|
|
BIN
doc/joinus.PNG
BIN
doc/joinus.PNG
Binary file not shown.
Before Width: | Height: | Size: 272 KiB After Width: | Height: | Size: 212 KiB |
|
@ -262,8 +262,8 @@ class PaddleOCR(predict_system.TextSystem):
|
|||
logger.error('rec_algorithm must in {}'.format(SUPPORT_REC_MODEL))
|
||||
sys.exit(0)
|
||||
|
||||
postprocess_params.rec_char_dict_path = Path(
|
||||
__file__).parent / postprocess_params.rec_char_dict_path
|
||||
postprocess_params.rec_char_dict_path = str(
|
||||
Path(__file__).parent / postprocess_params.rec_char_dict_path)
|
||||
|
||||
# init det_model and rec_model
|
||||
super().__init__(postprocess_params)
|
||||
|
|
|
@ -45,7 +45,6 @@ class BalanceLoss(nn.Layer):
|
|||
self.balance_loss = balance_loss
|
||||
self.main_loss_type = main_loss_type
|
||||
self.negative_ratio = negative_ratio
|
||||
self.main_loss_type = main_loss_type
|
||||
self.return_origin = return_origin
|
||||
self.eps = eps
|
||||
|
||||
|
|
|
@ -102,7 +102,6 @@ def init_model(config, model, logger, optimizer=None, lr_scheduler=None):
|
|||
best_model_dict = states_dict.get('best_model_dict', {})
|
||||
if 'epoch' in states_dict:
|
||||
best_model_dict['start_epoch'] = states_dict['epoch'] + 1
|
||||
best_model_dict['start_epoch'] = best_model_dict['best_epoch'] + 1
|
||||
|
||||
logger.info("resume from {}".format(checkpoints))
|
||||
elif pretrained_model:
|
||||
|
|
2
setup.py
2
setup.py
|
@ -32,7 +32,7 @@ setup(
|
|||
package_dir={'paddleocr': ''},
|
||||
include_package_data=True,
|
||||
entry_points={"console_scripts": ["paddleocr= paddleocr.paddleocr:main"]},
|
||||
version='2.0.1',
|
||||
version='2.0.2',
|
||||
install_requires=requirements,
|
||||
license='Apache License 2.0',
|
||||
description='Awesome OCR toolkits based on PaddlePaddle (8.6M ultra-lightweight pre-trained model, support training and deployment among server, mobile, embeded and IoT devices',
|
||||
|
|
|
@ -71,6 +71,9 @@ class TextDetector(object):
|
|||
postprocess_params["cover_thresh"] = args.det_east_cover_thresh
|
||||
postprocess_params["nms_thresh"] = args.det_east_nms_thresh
|
||||
elif self.det_algorithm == "SAST":
|
||||
pre_process_list[0] = {
|
||||
'DetResizeForTest': {'resize_long': args.det_limit_side_len}
|
||||
}
|
||||
postprocess_params['name'] = 'SASTPostProcess'
|
||||
postprocess_params["score_thresh"] = args.det_sast_score_thresh
|
||||
postprocess_params["nms_thresh"] = args.det_sast_nms_thresh
|
||||
|
|
|
@ -34,7 +34,6 @@ def parse_args():
|
|||
parser.add_argument("--ir_optim", type=str2bool, default=True)
|
||||
parser.add_argument("--use_tensorrt", type=str2bool, default=False)
|
||||
parser.add_argument("--use_fp16", type=str2bool, default=False)
|
||||
parser.add_argument("--max_batch_size", type=int, default=10)
|
||||
parser.add_argument("--gpu_mem", type=int, default=8000)
|
||||
|
||||
# params for text detector
|
||||
|
|
|
@ -332,7 +332,7 @@ def eval(model, valid_dataloader, post_process_class, eval_class):
|
|||
return metirc
|
||||
|
||||
|
||||
def preprocess():
|
||||
def preprocess(is_train=False):
|
||||
FLAGS = ArgsParser().parse_args()
|
||||
config = load_config(FLAGS.config)
|
||||
merge_config(FLAGS.opt)
|
||||
|
@ -350,15 +350,17 @@ def preprocess():
|
|||
device = paddle.set_device(device)
|
||||
|
||||
config['Global']['distributed'] = dist.get_world_size() != 1
|
||||
|
||||
# save_config
|
||||
save_model_dir = config['Global']['save_model_dir']
|
||||
os.makedirs(save_model_dir, exist_ok=True)
|
||||
with open(os.path.join(save_model_dir, 'config.yml'), 'w') as f:
|
||||
yaml.dump(dict(config), f, default_flow_style=False, sort_keys=False)
|
||||
|
||||
logger = get_logger(
|
||||
name='root', log_file='{}/train.log'.format(save_model_dir))
|
||||
if is_train:
|
||||
# save_config
|
||||
save_model_dir = config['Global']['save_model_dir']
|
||||
os.makedirs(save_model_dir, exist_ok=True)
|
||||
with open(os.path.join(save_model_dir, 'config.yml'), 'w') as f:
|
||||
yaml.dump(
|
||||
dict(config), f, default_flow_style=False, sort_keys=False)
|
||||
log_file = '{}/train.log'.format(save_model_dir)
|
||||
else:
|
||||
log_file = None
|
||||
logger = get_logger(name='root', log_file=log_file)
|
||||
if config['Global']['use_visualdl']:
|
||||
from visualdl import LogWriter
|
||||
vdl_writer_path = '{}/vdl/'.format(save_model_dir)
|
||||
|
|
|
@ -110,6 +110,6 @@ def test_reader(config, device, logger):
|
|||
|
||||
|
||||
if __name__ == '__main__':
|
||||
config, device, logger, vdl_writer = program.preprocess()
|
||||
config, device, logger, vdl_writer = program.preprocess(is_train=True)
|
||||
main(config, device, logger, vdl_writer)
|
||||
# test_reader(config, device, logger)
|
||||
|
|
Loading…
Reference in New Issue