mirror of https://gitee.com/answerdev/answer.git
feat: add html text cut pkg
This commit is contained in:
parent
b766824a4e
commit
3d95d03911
3
go.mod
3
go.mod
|
@ -15,6 +15,7 @@ require (
|
|||
github.com/goccy/go-json v0.9.11
|
||||
github.com/google/uuid v1.3.0
|
||||
github.com/google/wire v0.5.0
|
||||
github.com/grokify/html-strip-tags-go v0.0.1
|
||||
github.com/jinzhu/copier v0.3.5
|
||||
github.com/jinzhu/now v1.1.5
|
||||
github.com/lib/pq v1.10.7
|
||||
|
@ -35,6 +36,7 @@ require (
|
|||
golang.org/x/crypto v0.1.0
|
||||
golang.org/x/net v0.1.0
|
||||
gopkg.in/gomail.v2 v2.0.0-20160411212932-81ebce5c23df
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
xorm.io/builder v0.3.12
|
||||
xorm.io/core v0.7.3
|
||||
xorm.io/xorm v1.3.2
|
||||
|
@ -110,6 +112,5 @@ require (
|
|||
gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc // indirect
|
||||
gopkg.in/ini.v1 v1.67.0 // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
sigs.k8s.io/yaml v1.3.0 // indirect
|
||||
)
|
||||
|
|
4
go.sum
4
go.sum
|
@ -299,6 +299,8 @@ github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51
|
|||
github.com/gorilla/mux v1.6.2/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs=
|
||||
github.com/gorilla/mux v1.7.3/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs=
|
||||
github.com/gorilla/websocket v0.0.0-20170926233335-4201258b820c/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ=
|
||||
github.com/grokify/html-strip-tags-go v0.0.1 h1:0fThFwLbW7P/kOiTBs03FsJSV9RM2M/Q/MOnCQxKMo0=
|
||||
github.com/grokify/html-strip-tags-go v0.0.1/go.mod h1:2Su6romC5/1VXOQMaWL2yb618ARB8iVo6/DR99A6d78=
|
||||
github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs=
|
||||
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk=
|
||||
github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY=
|
||||
|
@ -594,8 +596,6 @@ github.com/segmentfault/pacman/contrib/cache/memory v0.0.0-20221018072427-a15dd1
|
|||
github.com/segmentfault/pacman/contrib/cache/memory v0.0.0-20221018072427-a15dd1434e05/go.mod h1:rmf1TCwz67dyM+AmTwSd1BxTo2AOYHj262lP93bOZbs=
|
||||
github.com/segmentfault/pacman/contrib/conf/viper v0.0.0-20221018072427-a15dd1434e05 h1:BlqTgc3/MYKG6vMI2MI+6o+7P4Gy5PXlawu185wPXAk=
|
||||
github.com/segmentfault/pacman/contrib/conf/viper v0.0.0-20221018072427-a15dd1434e05/go.mod h1:prPjFam7MyZ5b3S9dcDOt2tMPz6kf7C9c243s9zSwPY=
|
||||
github.com/segmentfault/pacman/contrib/i18n v0.0.0-20221018072427-a15dd1434e05 h1:gFCY9KUxhYg+/MXNcDYl4ILK+R1SG78FtaSR3JqZNYY=
|
||||
github.com/segmentfault/pacman/contrib/i18n v0.0.0-20221018072427-a15dd1434e05/go.mod h1:5Afm+OQdau/HQqSOp/ALlSUp0vZsMMMbv//kJhxuoi8=
|
||||
github.com/segmentfault/pacman/contrib/i18n v0.0.0-20221109042453-26158da67632 h1:so07u8RWXZQ0gz30KXJ9MKtQ5zjgcDlQ/UwFZrwm5b0=
|
||||
github.com/segmentfault/pacman/contrib/i18n v0.0.0-20221109042453-26158da67632/go.mod h1:5Afm+OQdau/HQqSOp/ALlSUp0vZsMMMbv//kJhxuoi8=
|
||||
github.com/segmentfault/pacman/contrib/log/zap v0.0.0-20221018072427-a15dd1434e05 h1:jcGZU2juv0L3eFEkuZYV14ESLUlWfGMWnP0mjOfrSZc=
|
||||
|
|
|
@ -0,0 +1,61 @@
|
|||
package htmltext
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/grokify/html-strip-tags-go"
|
||||
)
|
||||
|
||||
// ClearText clear HTML, get the clear text
|
||||
func ClearText(html string) (text string) {
|
||||
if len(html) == 0 {
|
||||
text = html
|
||||
return
|
||||
}
|
||||
|
||||
var (
|
||||
re *regexp.Regexp
|
||||
codeReg = `(?ism)<(pre)>.*<\/pre>`
|
||||
codeRepl = "{code...}"
|
||||
linkReg = `(?ism)<a.*?[^<]>.*?<\/a>`
|
||||
linkRepl = "[link]"
|
||||
spaceReg = ` +`
|
||||
spaceRepl = " "
|
||||
)
|
||||
re = regexp.MustCompile(codeReg)
|
||||
html = re.ReplaceAllString(html, codeRepl)
|
||||
|
||||
re = regexp.MustCompile(linkReg)
|
||||
html = re.ReplaceAllString(html, linkRepl)
|
||||
|
||||
text = strings.NewReplacer(
|
||||
"\n", " ",
|
||||
"\r", " ",
|
||||
"\t", " ",
|
||||
).Replace(strip.StripTags(html))
|
||||
|
||||
// replace multiple spaces to one space
|
||||
re = regexp.MustCompile(spaceReg)
|
||||
text = strings.TrimSpace(re.ReplaceAllString(text, spaceRepl))
|
||||
return
|
||||
}
|
||||
|
||||
// FetchExcerpt return the excerpt from the HTML string
|
||||
func FetchExcerpt(html, trimMarker string, limit int) (text string) {
|
||||
if len(html) == 0 {
|
||||
text = html
|
||||
return
|
||||
}
|
||||
|
||||
text = ClearText(html)
|
||||
runeText := []rune(text)
|
||||
if len(runeText) <= limit {
|
||||
text = string(runeText)
|
||||
} else {
|
||||
text = string(runeText[0:limit])
|
||||
}
|
||||
|
||||
text += trimMarker
|
||||
return
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
package htmltext
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestClearText(t *testing.T) {
|
||||
var (
|
||||
expected,
|
||||
clearedText string
|
||||
)
|
||||
|
||||
// test code clear text
|
||||
expected = "hello{code...}"
|
||||
clearedText = ClearText("<p>hello<pre>var a = \"good\"</pre></p>")
|
||||
assert.Equal(t, expected, clearedText)
|
||||
|
||||
// test link clear text
|
||||
expected = "hello[link]"
|
||||
clearedText = ClearText("<p>hello<a href=\"http://example.com/\">example.com</a></p>")
|
||||
assert.Equal(t, expected, clearedText)
|
||||
clearedText = ClearText("<p>hello<a href=\"https://example.com/\">example.com</a></p>")
|
||||
assert.Equal(t, expected, clearedText)
|
||||
|
||||
expected = "hello world"
|
||||
clearedText = ClearText("<div> hello</div>\n<div>world</div>")
|
||||
assert.Equal(t, expected, clearedText)
|
||||
}
|
||||
|
||||
func TestFetchExcerpt(t *testing.T) {
|
||||
var (
|
||||
expected,
|
||||
text string
|
||||
)
|
||||
|
||||
// test english string
|
||||
expected = "hello..."
|
||||
text = FetchExcerpt("<p>hello world</p>", "...", 5)
|
||||
assert.Equal(t, expected, text)
|
||||
|
||||
// test mixed string
|
||||
expected = "hello你好..."
|
||||
text = FetchExcerpt("<p>hello你好world</p>", "...", 7)
|
||||
assert.Equal(t, expected, text)
|
||||
|
||||
// test mixed string with emoticon
|
||||
expected = "hello你好😂..."
|
||||
text = FetchExcerpt("<p>hello你好😂world</p>", "...", 8)
|
||||
assert.Equal(t, expected, text)
|
||||
}
|
Loading…
Reference in New Issue