mirror of https://gitee.com/answerdev/answer.git
feat: get text excerpt
This commit is contained in:
parent
f2ac965be3
commit
8f567e0abd
1
go.mod
1
go.mod
|
@ -15,6 +15,7 @@ require (
|
|||
github.com/goccy/go-json v0.9.11
|
||||
github.com/google/uuid v1.3.0
|
||||
github.com/google/wire v0.5.0
|
||||
github.com/grokify/html-strip-tags-go v0.0.1
|
||||
github.com/jinzhu/copier v0.3.5
|
||||
github.com/jinzhu/now v1.1.5
|
||||
github.com/lib/pq v1.10.7
|
||||
|
|
2
go.sum
2
go.sum
|
@ -299,6 +299,8 @@ github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51
|
|||
github.com/gorilla/mux v1.6.2/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs=
|
||||
github.com/gorilla/mux v1.7.3/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs=
|
||||
github.com/gorilla/websocket v0.0.0-20170926233335-4201258b820c/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ=
|
||||
github.com/grokify/html-strip-tags-go v0.0.1 h1:0fThFwLbW7P/kOiTBs03FsJSV9RM2M/Q/MOnCQxKMo0=
|
||||
github.com/grokify/html-strip-tags-go v0.0.1/go.mod h1:2Su6romC5/1VXOQMaWL2yb618ARB8iVo6/DR99A6d78=
|
||||
github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs=
|
||||
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk=
|
||||
github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY=
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
package htmltext
|
||||
|
||||
import (
|
||||
"github.com/grokify/html-strip-tags-go"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ClearText clear HTML, get the clear text
|
||||
func ClearText(html string) (text string) {
|
||||
var (
|
||||
re *regexp.Regexp
|
||||
codeReg = `(?ism)<(pre)>.*<\/pre>`
|
||||
codeRepl = "{code...}"
|
||||
linkReg = `(?ism)<a.*?[^<]>.*?<\/a>`
|
||||
linkRepl = "[link]"
|
||||
spaceReg = ` +`
|
||||
spaceRepl = " "
|
||||
)
|
||||
re = regexp.MustCompile(codeReg)
|
||||
html = re.ReplaceAllString(html, codeRepl)
|
||||
|
||||
re = regexp.MustCompile(linkReg)
|
||||
html = re.ReplaceAllString(html, linkRepl)
|
||||
|
||||
text = strings.NewReplacer(
|
||||
"\n", " ",
|
||||
"\r", " ",
|
||||
"\t", " ",
|
||||
).Replace(strip.StripTags(html))
|
||||
|
||||
// replace multiple spaces to one space
|
||||
re = regexp.MustCompile(spaceReg)
|
||||
text = strings.TrimSpace(re.ReplaceAllString(text, spaceRepl))
|
||||
return
|
||||
}
|
||||
|
||||
// FetchExcerpt return the excerpt from the HTML string
|
||||
func FetchExcerpt(html, trimMarker string, limit int) (text string) {
|
||||
text = ClearText(html)
|
||||
runeText := []rune(text)
|
||||
text = string(runeText[0:limit])
|
||||
return
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
package htmltext
|
||||
|
||||
import (
|
||||
"github.com/stretchr/testify/assert"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestClearText(t *testing.T) {
|
||||
var (
|
||||
expected,
|
||||
clearedText string
|
||||
)
|
||||
|
||||
// test code clear text
|
||||
expected = "hello{code...}"
|
||||
clearedText = ClearText("<p>hello<pre>var a = \"good\"</pre></p>")
|
||||
assert.Equal(t, expected, clearedText)
|
||||
|
||||
// test link clear text
|
||||
expected = "hello[link]"
|
||||
clearedText = ClearText("<p>hello<a href=\"http://example.com/\">example.com</a></p>")
|
||||
assert.Equal(t, expected, clearedText)
|
||||
clearedText = ClearText("<p>hello<a href=\"https://example.com/\">example.com</a></p>")
|
||||
assert.Equal(t, expected, clearedText)
|
||||
|
||||
expected = "hello world"
|
||||
clearedText = ClearText("<div> hello</div>\n<div>world</div>")
|
||||
assert.Equal(t, expected, clearedText)
|
||||
}
|
||||
|
||||
func TestFetchExcerpt(t *testing.T) {
|
||||
var (
|
||||
expected,
|
||||
text string
|
||||
)
|
||||
|
||||
// test english string
|
||||
expected = "hello"
|
||||
text = FetchExcerpt("<p>hello world</p>", "...", 5)
|
||||
assert.Equal(t, expected, text)
|
||||
|
||||
// test mixed string
|
||||
expected = "hello你好"
|
||||
text = FetchExcerpt("<p>hello你好world</p>", "...", 7)
|
||||
assert.Equal(t, expected, text)
|
||||
|
||||
// test mixed string with emoticon
|
||||
expected = "hello你好😂"
|
||||
text = FetchExcerpt("<p>hello你好😂world</p>", "...", 8)
|
||||
assert.Equal(t, expected, text)
|
||||
}
|
Loading…
Reference in New Issue