PaddleOCR/thirdparty/paddleocr-go/ocr/postprocess.go

266 lines
8.1 KiB
Go

package ocr
import (
"image"
"image/color"
"math"
"sort"
"github.com/LKKlein/gocv"
"github.com/PaddlePaddle/PaddleOCR/thirdparty/paddleocr-go/paddle"
clipper "github.com/ctessum/go.clipper"
)
type xFloatSortBy [][]float32
func (a xFloatSortBy) Len() int { return len(a) }
func (a xFloatSortBy) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a xFloatSortBy) Less(i, j int) bool { return a[i][0] < a[j][0] }
type xIntSortBy [][]int
func (a xIntSortBy) Len() int { return len(a) }
func (a xIntSortBy) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a xIntSortBy) Less(i, j int) bool { return a[i][0] < a[j][0] }
type DetPostProcess interface {
Run(output *paddle.ZeroCopyTensor, oriH, oriW int, ratioH, ratioW float64) [][][]int
}
type DBPostProcess struct {
thresh float64
boxThresh float64
maxCandidates int
unClipRatio float64
minSize int
}
func NewDBPostProcess(thresh, boxThresh, unClipRatio float64) *DBPostProcess {
return &DBPostProcess{
thresh: thresh,
boxThresh: boxThresh,
unClipRatio: unClipRatio,
maxCandidates: 1000,
minSize: 3,
}
}
func (d *DBPostProcess) getMinBoxes(rect gocv.RotatedRect) [][]float32 {
points := gocv.NewMat()
gocv.BoxPoints(rect, &points)
defer points.Close()
array := d.mat2slice(points)
sort.Sort(xFloatSortBy(array))
point1, point2, point3, point4 := array[0], array[1], array[2], array[3]
if array[3][1] <= array[2][1] {
point2, point3 = array[3], array[2]
} else {
point2, point3 = array[2], array[3]
}
if array[1][1] <= array[0][1] {
point1, point4 = array[1], array[0]
} else {
point1, point4 = array[0], array[1]
}
array = [][]float32{point1, point2, point3, point4}
return array
}
func (d *DBPostProcess) mat2slice(mat gocv.Mat) [][]float32 {
array := make([][]float32, mat.Rows())
for i := 0; i < mat.Rows(); i++ {
tmp := make([]float32, mat.Cols())
for j := 0; j < mat.Cols(); j++ {
tmp[j] = mat.GetFloatAt(i, j)
}
array[i] = tmp
}
return array
}
func (d *DBPostProcess) boxScoreFast(array [][]float32, pred gocv.Mat) float64 {
height, width := pred.Rows(), pred.Cols()
boxX := []float32{array[0][0], array[1][0], array[2][0], array[3][0]}
boxY := []float32{array[0][1], array[1][1], array[2][1], array[3][1]}
xmin := clip(int(math.Floor(float64(minf(boxX)))), 0, width-1)
xmax := clip(int(math.Ceil(float64(maxf(boxX)))), 0, width-1)
ymin := clip(int(math.Floor(float64(minf(boxY)))), 0, height-1)
ymax := clip(int(math.Ceil(float64(maxf(boxY)))), 0, height-1)
mask := gocv.NewMatWithSize(ymax-ymin+1, xmax-xmin+1, gocv.MatTypeCV8UC1)
defer mask.Close()
ppt := make([][]image.Point, 1)
ppt[0] = make([]image.Point, 4)
ppt[0][0] = image.Point{int(array[0][0]) - xmin, int(array[0][1]) - ymin}
ppt[0][1] = image.Point{int(array[1][0]) - xmin, int(array[1][1]) - ymin}
ppt[0][2] = image.Point{int(array[2][0]) - xmin, int(array[2][1]) - ymin}
ppt[0][3] = image.Point{int(array[3][0]) - xmin, int(array[3][1]) - ymin}
gocv.FillPoly(&mask, ppt, color.RGBA{0, 0, 1, 0})
croppedImg := pred.Region(image.Rect(xmin, ymin, xmax+1, ymax+1))
s := croppedImg.MeanWithMask(mask)
return s.Val1
}
func (d *DBPostProcess) unClip(box [][]float32) gocv.RotatedRect {
var area, dist float64
for i := 0; i < 4; i++ {
area += float64(box[i][0]*box[(i+1)%4][1] - box[i][1]*box[(i+1)%4][0])
dist += math.Sqrt(float64(
(box[i][0]-box[(i+1)%4][0])*(box[i][0]-box[(i+1)%4][0]) +
(box[i][1]-box[(i+1)%4][1])*(box[i][1]-box[(i+1)%4][1]),
))
}
area = math.Abs(area / 2.0)
distance := area * d.unClipRatio / dist
offset := clipper.NewClipperOffset()
path := make([]*clipper.IntPoint, 4)
path[0] = &clipper.IntPoint{X: clipper.CInt(box[0][0]), Y: clipper.CInt(box[0][1])}
path[1] = &clipper.IntPoint{X: clipper.CInt(box[1][0]), Y: clipper.CInt(box[1][1])}
path[2] = &clipper.IntPoint{X: clipper.CInt(box[2][0]), Y: clipper.CInt(box[2][1])}
path[3] = &clipper.IntPoint{X: clipper.CInt(box[3][0]), Y: clipper.CInt(box[3][1])}
offset.AddPath(clipper.Path(path), clipper.JtRound, clipper.EtClosedPolygon)
soln := offset.Execute(distance)
points := make([]image.Point, 0, 4)
for i := 0; i < len(soln); i++ {
for j := 0; j < len(soln[i]); j++ {
points = append(points, image.Point{int(soln[i][j].X), int(soln[i][j].Y)})
}
}
var res gocv.RotatedRect
if len(points) <= 0 {
points = make([]image.Point, 4)
points[0] = image.Pt(0, 0)
points[1] = image.Pt(1, 0)
points[2] = image.Pt(1, 1)
points[3] = image.Pt(0, 1)
res = gocv.RotatedRect{
Contour: points,
BoundingRect: image.Rect(0, 0, 1, 1),
Center: gocv.Point2f{X: 0.5, Y: 0.5},
Width: 1,
Height: 1,
Angle: 0,
}
} else {
res = gocv.MinAreaRect(points)
}
return res
}
func (d *DBPostProcess) boxesFromBitmap(pred gocv.Mat, mask gocv.Mat, ratioH float64, ratioW float64) [][][]int {
height, width := mask.Rows(), mask.Cols()
mask.MultiplyUChar(255)
contours := gocv.FindContours(mask, gocv.RetrievalList, gocv.ChainApproxSimple)
numContours := len(contours)
if numContours > d.maxCandidates {
numContours = d.maxCandidates
}
boxes := make([][][]int, 0, numContours)
for i := 0; i < numContours; i++ {
contour := contours[i]
boundingbox := gocv.MinAreaRect(contour)
if boundingbox.Width < float32(d.minSize) || boundingbox.Height < float32(d.minSize) {
continue
}
points := d.getMinBoxes(boundingbox)
score := d.boxScoreFast(points, pred)
if score < d.boxThresh {
continue
}
box := d.unClip(points)
if box.Width < float32(d.minSize+2) || box.Height < float32(d.minSize+2) {
continue
}
cliparray := d.getMinBoxes(box)
dstHeight, dstWidth := pred.Rows(), pred.Cols()
intcliparray := make([][]int, 4)
for i := 0; i < 4; i++ {
p := []int{
int(float64(clip(int(math.Round(
float64(cliparray[i][0]/float32(width)*float32(dstWidth)))), 0, dstWidth)) / ratioW),
int(float64(clip(int(math.Round(
float64(cliparray[i][1]/float32(height)*float32(dstHeight)))), 0, dstHeight)) / ratioH),
}
intcliparray[i] = p
}
boxes = append(boxes, intcliparray)
}
return boxes
}
func (d *DBPostProcess) orderPointsClockwise(box [][]int) [][]int {
sort.Sort(xIntSortBy(box))
leftmost := [][]int{box[0], box[1]}
rightmost := [][]int{box[2], box[3]}
if leftmost[0][1] > leftmost[1][1] {
leftmost[0], leftmost[1] = leftmost[1], leftmost[0]
}
if rightmost[0][1] > rightmost[1][1] {
rightmost[0], rightmost[1] = rightmost[1], rightmost[0]
}
return [][]int{leftmost[0], rightmost[0], rightmost[1], leftmost[1]}
}
func (d *DBPostProcess) filterTagDetRes(boxes [][][]int, oriH, oriW int) [][][]int {
points := make([][][]int, 0, len(boxes))
for i := 0; i < len(boxes); i++ {
boxes[i] = d.orderPointsClockwise(boxes[i])
for j := 0; j < len(boxes[i]); j++ {
boxes[i][j][0] = clip(boxes[i][j][0], 0, oriW-1)
boxes[i][j][1] = clip(boxes[i][j][1], 0, oriH-1)
}
}
for i := 0; i < len(boxes); i++ {
rectW := int(math.Sqrt(math.Pow(float64(boxes[i][0][0]-boxes[i][1][0]), 2.0) +
math.Pow(float64(boxes[i][0][1]-boxes[i][1][1]), 2.0)))
rectH := int(math.Sqrt(math.Pow(float64(boxes[i][0][0]-boxes[i][3][0]), 2.0) +
math.Pow(float64(boxes[i][0][1]-boxes[i][3][1]), 2.0)))
if rectW <= 4 || rectH <= 4 {
continue
}
points = append(points, boxes[i])
}
return points
}
func (d *DBPostProcess) Run(output *paddle.ZeroCopyTensor, oriH, oriW int, ratioH, ratioW float64) [][][]int {
v := output.Value().([][][][]float32)
shape := output.Shape()
height, width := int(shape[2]), int(shape[3])
pred := gocv.NewMatWithSize(height, width, gocv.MatTypeCV32F)
bitmap := gocv.NewMatWithSize(height, width, gocv.MatTypeCV8UC1)
thresh := float32(d.thresh)
for i := 0; i < height; i++ {
for j := 0; j < width; j++ {
pred.SetFloatAt(i, j, v[0][0][i][j])
if v[0][0][i][j] > thresh {
bitmap.SetUCharAt(i, j, 1)
} else {
bitmap.SetUCharAt(i, j, 0)
}
}
}
mask := gocv.NewMat()
kernel := gocv.GetStructuringElement(gocv.MorphRect, image.Point{2, 2})
gocv.Dilate(bitmap, &mask, kernel)
boxes := d.boxesFromBitmap(pred, mask, ratioH, ratioW)
dtboxes := d.filterTagDetRes(boxes, oriH, oriW)
return dtboxes
}