support log decoding
This commit is contained in:
parent
2768920249
commit
74b8e7d6c1
|
@ -25,6 +25,16 @@ const (
|
|||
UTF16BE string = "utf-16-be"
|
||||
// UTF16LE for UTF-16 Little Endian encoding
|
||||
UTF16LE string = "utf-16-le"
|
||||
|
||||
// https://en.wikipedia.org/wiki/GB_2312
|
||||
// https://en.wikipedia.org/wiki/GBK_(character_encoding)
|
||||
// https://en.wikipedia.org/wiki/GB_18030
|
||||
// https://en.wikipedia.org/wiki/Big5
|
||||
GB18030 string = "gb18030"
|
||||
GB2312 string = "gb2312"
|
||||
HZGB2312 string = "hz-gb2312"
|
||||
GBK string = "gbk"
|
||||
BIG5 string = "big5"
|
||||
)
|
||||
|
||||
// LogsConfig represents a log source config, which can be for instance
|
||||
|
|
|
@ -14,6 +14,7 @@ import (
|
|||
"path/filepath"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
|
@ -77,13 +78,26 @@ func NewDecoderFromSourceWithPattern(source *logsconfig.LogSource, multiLinePatt
|
|||
// lineParser = docker.JSONParser
|
||||
// matcher = &decoder.NewLineMatcher{}
|
||||
default:
|
||||
switch source.Config.Encoding {
|
||||
switch strings.ToLower(source.Config.Encoding) {
|
||||
case logsconfig.UTF16BE:
|
||||
lineParser = parser.NewDecodingParser(parser.UTF16BE)
|
||||
matcher = decoder.NewBytesSequenceMatcher(decoder.Utf16beEOL)
|
||||
case logsconfig.UTF16LE:
|
||||
lineParser = parser.NewDecodingParser(parser.UTF16LE)
|
||||
matcher = decoder.NewBytesSequenceMatcher(decoder.Utf16leEOL)
|
||||
case logsconfig.GB18030:
|
||||
lineParser = parser.NewDecodingParser(parser.GBK18030)
|
||||
matcher = &decoder.NewLineMatcher{}
|
||||
case logsconfig.HZGB2312:
|
||||
lineParser = parser.NewDecodingParser(parser.HZGB2312)
|
||||
matcher = &decoder.NewLineMatcher{}
|
||||
case logsconfig.GBK, logsconfig.GB2312:
|
||||
lineParser = parser.NewDecodingParser(parser.GBK)
|
||||
matcher = &decoder.NewLineMatcher{}
|
||||
case logsconfig.BIG5:
|
||||
lineParser = parser.NewDecodingParser(parser.BIG5)
|
||||
matcher = &decoder.NewLineMatcher{}
|
||||
|
||||
default:
|
||||
lineParser = parser.NoopParser
|
||||
matcher = &decoder.NewLineMatcher{}
|
||||
|
|
|
@ -7,6 +7,8 @@ package parser
|
|||
|
||||
import (
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/simplifiedchinese"
|
||||
"golang.org/x/text/encoding/traditionalchinese"
|
||||
"golang.org/x/text/encoding/unicode"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
@ -22,6 +24,16 @@ const (
|
|||
UTF16LE = iota
|
||||
// UTF16BE UTF16 big endian
|
||||
UTF16BE
|
||||
//
|
||||
GBK18030
|
||||
//
|
||||
GB2312
|
||||
//
|
||||
HZGB2312
|
||||
//
|
||||
GBK
|
||||
//
|
||||
BIG5
|
||||
)
|
||||
|
||||
// Parser parse messages
|
||||
|
@ -67,6 +79,14 @@ func NewDecodingParser(e Encoding) *DecodingParser {
|
|||
enc = unicode.UTF16(unicode.LittleEndian, unicode.UseBOM)
|
||||
case UTF16BE:
|
||||
enc = unicode.UTF16(unicode.BigEndian, unicode.UseBOM)
|
||||
case GBK, GB2312:
|
||||
enc = simplifiedchinese.GBK
|
||||
case HZGB2312:
|
||||
enc = simplifiedchinese.HZGB2312
|
||||
case GBK18030:
|
||||
enc = simplifiedchinese.GB18030
|
||||
case BIG5:
|
||||
enc = traditionalchinese.Big5
|
||||
}
|
||||
p.decoder = enc.NewDecoder()
|
||||
return p
|
||||
|
|
Loading…
Reference in New Issue