diff --git a/config/logs/integration_config.go b/config/logs/integration_config.go index 60dcfcf..096d58b 100644 --- a/config/logs/integration_config.go +++ b/config/logs/integration_config.go @@ -25,6 +25,16 @@ const ( UTF16BE string = "utf-16-be" // UTF16LE for UTF-16 Little Endian encoding UTF16LE string = "utf-16-le" + + // https://en.wikipedia.org/wiki/GB_2312 + // https://en.wikipedia.org/wiki/GBK_(character_encoding) + // https://en.wikipedia.org/wiki/GB_18030 + // https://en.wikipedia.org/wiki/Big5 + GB18030 string = "gb18030" + GB2312 string = "gb2312" + HZGB2312 string = "hz-gb2312" + GBK string = "gbk" + BIG5 string = "big5" ) // LogsConfig represents a log source config, which can be for instance diff --git a/logs/input/file/tailer.go b/logs/input/file/tailer.go index 9c5e96a..efc5a6c 100644 --- a/logs/input/file/tailer.go +++ b/logs/input/file/tailer.go @@ -14,6 +14,7 @@ import ( "path/filepath" "regexp" "strconv" + "strings" "sync/atomic" "time" @@ -77,13 +78,26 @@ func NewDecoderFromSourceWithPattern(source *logsconfig.LogSource, multiLinePatt // lineParser = docker.JSONParser // matcher = &decoder.NewLineMatcher{} default: - switch source.Config.Encoding { + switch strings.ToLower(source.Config.Encoding) { case logsconfig.UTF16BE: lineParser = parser.NewDecodingParser(parser.UTF16BE) matcher = decoder.NewBytesSequenceMatcher(decoder.Utf16beEOL) case logsconfig.UTF16LE: lineParser = parser.NewDecodingParser(parser.UTF16LE) matcher = decoder.NewBytesSequenceMatcher(decoder.Utf16leEOL) + case logsconfig.GB18030: + lineParser = parser.NewDecodingParser(parser.GBK18030) + matcher = &decoder.NewLineMatcher{} + case logsconfig.HZGB2312: + lineParser = parser.NewDecodingParser(parser.HZGB2312) + matcher = &decoder.NewLineMatcher{} + case logsconfig.GBK, logsconfig.GB2312: + lineParser = parser.NewDecodingParser(parser.GBK) + matcher = &decoder.NewLineMatcher{} + case logsconfig.BIG5: + lineParser = parser.NewDecodingParser(parser.BIG5) + matcher = &decoder.NewLineMatcher{} + default: lineParser = parser.NoopParser matcher = &decoder.NewLineMatcher{} diff --git a/logs/parser/parser.go b/logs/parser/parser.go index 27fd1b3..c7d58b3 100644 --- a/logs/parser/parser.go +++ b/logs/parser/parser.go @@ -7,6 +7,8 @@ package parser import ( "golang.org/x/text/encoding" + "golang.org/x/text/encoding/simplifiedchinese" + "golang.org/x/text/encoding/traditionalchinese" "golang.org/x/text/encoding/unicode" "golang.org/x/text/transform" ) @@ -22,6 +24,16 @@ const ( UTF16LE = iota // UTF16BE UTF16 big endian UTF16BE + // + GBK18030 + // + GB2312 + // + HZGB2312 + // + GBK + // + BIG5 ) // Parser parse messages @@ -67,6 +79,14 @@ func NewDecodingParser(e Encoding) *DecodingParser { enc = unicode.UTF16(unicode.LittleEndian, unicode.UseBOM) case UTF16BE: enc = unicode.UTF16(unicode.BigEndian, unicode.UseBOM) + case GBK, GB2312: + enc = simplifiedchinese.GBK + case HZGB2312: + enc = simplifiedchinese.HZGB2312 + case GBK18030: + enc = simplifiedchinese.GB18030 + case BIG5: + enc = traditionalchinese.Big5 } p.decoder = enc.NewDecoder() return p