support log decoding

This commit is contained in:
kongfei 2022-08-19 15:46:43 +08:00
parent 2768920249
commit 74b8e7d6c1
3 changed files with 45 additions and 1 deletions

View File

@ -25,6 +25,16 @@ const (
UTF16BE string = "utf-16-be"
// UTF16LE for UTF-16 Little Endian encoding
UTF16LE string = "utf-16-le"
// https://en.wikipedia.org/wiki/GB_2312
// https://en.wikipedia.org/wiki/GBK_(character_encoding)
// https://en.wikipedia.org/wiki/GB_18030
// https://en.wikipedia.org/wiki/Big5
GB18030 string = "gb18030"
GB2312 string = "gb2312"
HZGB2312 string = "hz-gb2312"
GBK string = "gbk"
BIG5 string = "big5"
)
// LogsConfig represents a log source config, which can be for instance

View File

@ -14,6 +14,7 @@ import (
"path/filepath"
"regexp"
"strconv"
"strings"
"sync/atomic"
"time"
@ -77,13 +78,26 @@ func NewDecoderFromSourceWithPattern(source *logsconfig.LogSource, multiLinePatt
// lineParser = docker.JSONParser
// matcher = &decoder.NewLineMatcher{}
default:
switch source.Config.Encoding {
switch strings.ToLower(source.Config.Encoding) {
case logsconfig.UTF16BE:
lineParser = parser.NewDecodingParser(parser.UTF16BE)
matcher = decoder.NewBytesSequenceMatcher(decoder.Utf16beEOL)
case logsconfig.UTF16LE:
lineParser = parser.NewDecodingParser(parser.UTF16LE)
matcher = decoder.NewBytesSequenceMatcher(decoder.Utf16leEOL)
case logsconfig.GB18030:
lineParser = parser.NewDecodingParser(parser.GBK18030)
matcher = &decoder.NewLineMatcher{}
case logsconfig.HZGB2312:
lineParser = parser.NewDecodingParser(parser.HZGB2312)
matcher = &decoder.NewLineMatcher{}
case logsconfig.GBK, logsconfig.GB2312:
lineParser = parser.NewDecodingParser(parser.GBK)
matcher = &decoder.NewLineMatcher{}
case logsconfig.BIG5:
lineParser = parser.NewDecodingParser(parser.BIG5)
matcher = &decoder.NewLineMatcher{}
default:
lineParser = parser.NoopParser
matcher = &decoder.NewLineMatcher{}

View File

@ -7,6 +7,8 @@ package parser
import (
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/simplifiedchinese"
"golang.org/x/text/encoding/traditionalchinese"
"golang.org/x/text/encoding/unicode"
"golang.org/x/text/transform"
)
@ -22,6 +24,16 @@ const (
UTF16LE = iota
// UTF16BE UTF16 big endian
UTF16BE
//
GBK18030
//
GB2312
//
HZGB2312
//
GBK
//
BIG5
)
// Parser parse messages
@ -67,6 +79,14 @@ func NewDecodingParser(e Encoding) *DecodingParser {
enc = unicode.UTF16(unicode.LittleEndian, unicode.UseBOM)
case UTF16BE:
enc = unicode.UTF16(unicode.BigEndian, unicode.UseBOM)
case GBK, GB2312:
enc = simplifiedchinese.GBK
case HZGB2312:
enc = simplifiedchinese.HZGB2312
case GBK18030:
enc = simplifiedchinese.GB18030
case BIG5:
enc = traditionalchinese.Big5
}
p.decoder = enc.NewDecoder()
return p