266 lines
8.1 KiB
Go
266 lines
8.1 KiB
Go
package ocr
|
|
|
|
import (
|
|
"image"
|
|
"image/color"
|
|
"math"
|
|
"sort"
|
|
|
|
"github.com/LKKlein/gocv"
|
|
"github.com/PaddlePaddle/PaddleOCR/thirdparty/paddleocr-go/paddle"
|
|
clipper "github.com/ctessum/go.clipper"
|
|
)
|
|
|
|
type xFloatSortBy [][]float32
|
|
|
|
func (a xFloatSortBy) Len() int { return len(a) }
|
|
func (a xFloatSortBy) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
|
func (a xFloatSortBy) Less(i, j int) bool { return a[i][0] < a[j][0] }
|
|
|
|
type xIntSortBy [][]int
|
|
|
|
func (a xIntSortBy) Len() int { return len(a) }
|
|
func (a xIntSortBy) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
|
func (a xIntSortBy) Less(i, j int) bool { return a[i][0] < a[j][0] }
|
|
|
|
type DetPostProcess interface {
|
|
Run(output *paddle.ZeroCopyTensor, oriH, oriW int, ratioH, ratioW float64) [][][]int
|
|
}
|
|
|
|
type DBPostProcess struct {
|
|
thresh float64
|
|
boxThresh float64
|
|
maxCandidates int
|
|
unClipRatio float64
|
|
minSize int
|
|
}
|
|
|
|
func NewDBPostProcess(thresh, boxThresh, unClipRatio float64) *DBPostProcess {
|
|
return &DBPostProcess{
|
|
thresh: thresh,
|
|
boxThresh: boxThresh,
|
|
unClipRatio: unClipRatio,
|
|
maxCandidates: 1000,
|
|
minSize: 3,
|
|
}
|
|
}
|
|
|
|
func (d *DBPostProcess) getMinBoxes(rect gocv.RotatedRect) [][]float32 {
|
|
points := gocv.NewMat()
|
|
gocv.BoxPoints(rect, &points)
|
|
defer points.Close()
|
|
array := d.mat2slice(points)
|
|
sort.Sort(xFloatSortBy(array))
|
|
|
|
point1, point2, point3, point4 := array[0], array[1], array[2], array[3]
|
|
if array[3][1] <= array[2][1] {
|
|
point2, point3 = array[3], array[2]
|
|
} else {
|
|
point2, point3 = array[2], array[3]
|
|
}
|
|
|
|
if array[1][1] <= array[0][1] {
|
|
point1, point4 = array[1], array[0]
|
|
} else {
|
|
point1, point4 = array[0], array[1]
|
|
}
|
|
|
|
array = [][]float32{point1, point2, point3, point4}
|
|
return array
|
|
}
|
|
|
|
func (d *DBPostProcess) mat2slice(mat gocv.Mat) [][]float32 {
|
|
array := make([][]float32, mat.Rows())
|
|
for i := 0; i < mat.Rows(); i++ {
|
|
tmp := make([]float32, mat.Cols())
|
|
for j := 0; j < mat.Cols(); j++ {
|
|
tmp[j] = mat.GetFloatAt(i, j)
|
|
}
|
|
array[i] = tmp
|
|
}
|
|
return array
|
|
}
|
|
|
|
func (d *DBPostProcess) boxScoreFast(array [][]float32, pred gocv.Mat) float64 {
|
|
height, width := pred.Rows(), pred.Cols()
|
|
boxX := []float32{array[0][0], array[1][0], array[2][0], array[3][0]}
|
|
boxY := []float32{array[0][1], array[1][1], array[2][1], array[3][1]}
|
|
|
|
xmin := clip(int(math.Floor(float64(minf(boxX)))), 0, width-1)
|
|
xmax := clip(int(math.Ceil(float64(maxf(boxX)))), 0, width-1)
|
|
ymin := clip(int(math.Floor(float64(minf(boxY)))), 0, height-1)
|
|
ymax := clip(int(math.Ceil(float64(maxf(boxY)))), 0, height-1)
|
|
|
|
mask := gocv.NewMatWithSize(ymax-ymin+1, xmax-xmin+1, gocv.MatTypeCV8UC1)
|
|
defer mask.Close()
|
|
ppt := make([][]image.Point, 1)
|
|
ppt[0] = make([]image.Point, 4)
|
|
ppt[0][0] = image.Point{int(array[0][0]) - xmin, int(array[0][1]) - ymin}
|
|
ppt[0][1] = image.Point{int(array[1][0]) - xmin, int(array[1][1]) - ymin}
|
|
ppt[0][2] = image.Point{int(array[2][0]) - xmin, int(array[2][1]) - ymin}
|
|
ppt[0][3] = image.Point{int(array[3][0]) - xmin, int(array[3][1]) - ymin}
|
|
gocv.FillPoly(&mask, ppt, color.RGBA{0, 0, 1, 0})
|
|
croppedImg := pred.Region(image.Rect(xmin, ymin, xmax+1, ymax+1))
|
|
s := croppedImg.MeanWithMask(mask)
|
|
return s.Val1
|
|
}
|
|
|
|
func (d *DBPostProcess) unClip(box [][]float32) gocv.RotatedRect {
|
|
var area, dist float64
|
|
for i := 0; i < 4; i++ {
|
|
area += float64(box[i][0]*box[(i+1)%4][1] - box[i][1]*box[(i+1)%4][0])
|
|
dist += math.Sqrt(float64(
|
|
(box[i][0]-box[(i+1)%4][0])*(box[i][0]-box[(i+1)%4][0]) +
|
|
(box[i][1]-box[(i+1)%4][1])*(box[i][1]-box[(i+1)%4][1]),
|
|
))
|
|
}
|
|
area = math.Abs(area / 2.0)
|
|
distance := area * d.unClipRatio / dist
|
|
offset := clipper.NewClipperOffset()
|
|
path := make([]*clipper.IntPoint, 4)
|
|
path[0] = &clipper.IntPoint{X: clipper.CInt(box[0][0]), Y: clipper.CInt(box[0][1])}
|
|
path[1] = &clipper.IntPoint{X: clipper.CInt(box[1][0]), Y: clipper.CInt(box[1][1])}
|
|
path[2] = &clipper.IntPoint{X: clipper.CInt(box[2][0]), Y: clipper.CInt(box[2][1])}
|
|
path[3] = &clipper.IntPoint{X: clipper.CInt(box[3][0]), Y: clipper.CInt(box[3][1])}
|
|
offset.AddPath(clipper.Path(path), clipper.JtRound, clipper.EtClosedPolygon)
|
|
soln := offset.Execute(distance)
|
|
|
|
points := make([]image.Point, 0, 4)
|
|
for i := 0; i < len(soln); i++ {
|
|
for j := 0; j < len(soln[i]); j++ {
|
|
points = append(points, image.Point{int(soln[i][j].X), int(soln[i][j].Y)})
|
|
}
|
|
}
|
|
|
|
var res gocv.RotatedRect
|
|
if len(points) <= 0 {
|
|
points = make([]image.Point, 4)
|
|
points[0] = image.Pt(0, 0)
|
|
points[1] = image.Pt(1, 0)
|
|
points[2] = image.Pt(1, 1)
|
|
points[3] = image.Pt(0, 1)
|
|
res = gocv.RotatedRect{
|
|
Contour: points,
|
|
BoundingRect: image.Rect(0, 0, 1, 1),
|
|
Center: gocv.Point2f{X: 0.5, Y: 0.5},
|
|
Width: 1,
|
|
Height: 1,
|
|
Angle: 0,
|
|
}
|
|
} else {
|
|
res = gocv.MinAreaRect(points)
|
|
}
|
|
return res
|
|
}
|
|
|
|
func (d *DBPostProcess) boxesFromBitmap(pred gocv.Mat, mask gocv.Mat, ratioH float64, ratioW float64) [][][]int {
|
|
height, width := mask.Rows(), mask.Cols()
|
|
mask.MultiplyUChar(255)
|
|
contours := gocv.FindContours(mask, gocv.RetrievalList, gocv.ChainApproxSimple)
|
|
numContours := len(contours)
|
|
if numContours > d.maxCandidates {
|
|
numContours = d.maxCandidates
|
|
}
|
|
|
|
boxes := make([][][]int, 0, numContours)
|
|
for i := 0; i < numContours; i++ {
|
|
contour := contours[i]
|
|
boundingbox := gocv.MinAreaRect(contour)
|
|
if boundingbox.Width < float32(d.minSize) || boundingbox.Height < float32(d.minSize) {
|
|
continue
|
|
}
|
|
points := d.getMinBoxes(boundingbox)
|
|
score := d.boxScoreFast(points, pred)
|
|
if score < d.boxThresh {
|
|
continue
|
|
}
|
|
|
|
box := d.unClip(points)
|
|
if box.Width < float32(d.minSize+2) || box.Height < float32(d.minSize+2) {
|
|
continue
|
|
}
|
|
|
|
cliparray := d.getMinBoxes(box)
|
|
dstHeight, dstWidth := pred.Rows(), pred.Cols()
|
|
intcliparray := make([][]int, 4)
|
|
for i := 0; i < 4; i++ {
|
|
p := []int{
|
|
int(float64(clip(int(math.Round(
|
|
float64(cliparray[i][0]/float32(width)*float32(dstWidth)))), 0, dstWidth)) / ratioW),
|
|
int(float64(clip(int(math.Round(
|
|
float64(cliparray[i][1]/float32(height)*float32(dstHeight)))), 0, dstHeight)) / ratioH),
|
|
}
|
|
intcliparray[i] = p
|
|
}
|
|
boxes = append(boxes, intcliparray)
|
|
}
|
|
return boxes
|
|
}
|
|
|
|
func (d *DBPostProcess) orderPointsClockwise(box [][]int) [][]int {
|
|
sort.Sort(xIntSortBy(box))
|
|
leftmost := [][]int{box[0], box[1]}
|
|
rightmost := [][]int{box[2], box[3]}
|
|
|
|
if leftmost[0][1] > leftmost[1][1] {
|
|
leftmost[0], leftmost[1] = leftmost[1], leftmost[0]
|
|
}
|
|
|
|
if rightmost[0][1] > rightmost[1][1] {
|
|
rightmost[0], rightmost[1] = rightmost[1], rightmost[0]
|
|
}
|
|
|
|
return [][]int{leftmost[0], rightmost[0], rightmost[1], leftmost[1]}
|
|
}
|
|
|
|
func (d *DBPostProcess) filterTagDetRes(boxes [][][]int, oriH, oriW int) [][][]int {
|
|
points := make([][][]int, 0, len(boxes))
|
|
for i := 0; i < len(boxes); i++ {
|
|
boxes[i] = d.orderPointsClockwise(boxes[i])
|
|
for j := 0; j < len(boxes[i]); j++ {
|
|
boxes[i][j][0] = clip(boxes[i][j][0], 0, oriW-1)
|
|
boxes[i][j][1] = clip(boxes[i][j][1], 0, oriH-1)
|
|
}
|
|
}
|
|
|
|
for i := 0; i < len(boxes); i++ {
|
|
rectW := int(math.Sqrt(math.Pow(float64(boxes[i][0][0]-boxes[i][1][0]), 2.0) +
|
|
math.Pow(float64(boxes[i][0][1]-boxes[i][1][1]), 2.0)))
|
|
rectH := int(math.Sqrt(math.Pow(float64(boxes[i][0][0]-boxes[i][3][0]), 2.0) +
|
|
math.Pow(float64(boxes[i][0][1]-boxes[i][3][1]), 2.0)))
|
|
if rectW <= 4 || rectH <= 4 {
|
|
continue
|
|
}
|
|
points = append(points, boxes[i])
|
|
}
|
|
return points
|
|
}
|
|
|
|
func (d *DBPostProcess) Run(output *paddle.ZeroCopyTensor, oriH, oriW int, ratioH, ratioW float64) [][][]int {
|
|
v := output.Value().([][][][]float32)
|
|
|
|
shape := output.Shape()
|
|
height, width := int(shape[2]), int(shape[3])
|
|
|
|
pred := gocv.NewMatWithSize(height, width, gocv.MatTypeCV32F)
|
|
bitmap := gocv.NewMatWithSize(height, width, gocv.MatTypeCV8UC1)
|
|
thresh := float32(d.thresh)
|
|
for i := 0; i < height; i++ {
|
|
for j := 0; j < width; j++ {
|
|
pred.SetFloatAt(i, j, v[0][0][i][j])
|
|
if v[0][0][i][j] > thresh {
|
|
bitmap.SetUCharAt(i, j, 1)
|
|
} else {
|
|
bitmap.SetUCharAt(i, j, 0)
|
|
}
|
|
}
|
|
}
|
|
|
|
mask := gocv.NewMat()
|
|
kernel := gocv.GetStructuringElement(gocv.MorphRect, image.Point{2, 2})
|
|
gocv.Dilate(bitmap, &mask, kernel)
|
|
boxes := d.boxesFromBitmap(pred, mask, ratioH, ratioW)
|
|
dtboxes := d.filterTagDetRes(boxes, oriH, oriW)
|
|
return dtboxes
|
|
}
|