268 lines
8.1 KiB
Go
268 lines
8.1 KiB
Go
// Unless explicitly stated otherwise all files in this repository are licensed
|
|
// under the Apache License Version 2.0.
|
|
// This product includes software developed at Datadog (https://www.datadoghq.com/).
|
|
// Copyright 2016-present Datadog, Inc.
|
|
|
|
package decoder
|
|
|
|
import (
|
|
"bytes"
|
|
"log"
|
|
"regexp"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
config "flashcat.cloud/categraf/config/logs"
|
|
"flashcat.cloud/categraf/logs/parser"
|
|
)
|
|
|
|
// defaultContentLenLimit represents the max size for a line,
|
|
// if a line is bigger than this limit, it will be truncated.
|
|
const defaultContentLenLimit = 256 * 1000
|
|
|
|
// Input represents a chunk of line.
|
|
type Input struct {
|
|
content []byte
|
|
}
|
|
|
|
// NewInput returns a new input.
|
|
func NewInput(content []byte) *Input {
|
|
return &Input{
|
|
content: content,
|
|
}
|
|
}
|
|
|
|
// DecodedInput represents a decoded line and the raw length
|
|
type DecodedInput struct {
|
|
content []byte
|
|
rawDataLen int
|
|
}
|
|
|
|
// NewDecodedInput returns a new decoded input.
|
|
func NewDecodedInput(content []byte, rawDataLen int) *DecodedInput {
|
|
return &DecodedInput{
|
|
content: content,
|
|
rawDataLen: rawDataLen,
|
|
}
|
|
}
|
|
|
|
// Message represents a structured line.
|
|
type Message struct {
|
|
Content []byte
|
|
Status string
|
|
RawDataLen int
|
|
Timestamp string
|
|
IngestionTimestamp int64
|
|
}
|
|
|
|
// NewMessage returns a new output.
|
|
func NewMessage(content []byte, status string, rawDataLen int, timestamp string) *Message {
|
|
return &Message{
|
|
Content: content,
|
|
Status: status,
|
|
RawDataLen: rawDataLen,
|
|
Timestamp: timestamp,
|
|
IngestionTimestamp: time.Now().UnixNano(),
|
|
}
|
|
}
|
|
|
|
// Decoder splits raw data into lines and passes them to a lineParser that passes them to
|
|
// a lineHandler that emits outputs
|
|
// Input->[decoder]->[parser]->[handler]->Message
|
|
type Decoder struct {
|
|
// The number of raw lines decoded from the input before they are processed.
|
|
// Needs to be first to ensure 64 bit alignment
|
|
linesDecoded int64
|
|
|
|
InputChan chan *Input
|
|
OutputChan chan *Message
|
|
matcher EndLineMatcher
|
|
lineBuffer *bytes.Buffer
|
|
lineParser LineParser
|
|
contentLenLimit int
|
|
rawDataLen int
|
|
|
|
// The decoder holds on to an instace of DetectedPattern which is a thread safe container used to
|
|
// pass a multiline pattern up from the line handler in order to surface it to the tailer.
|
|
// The tailer uses this to determine if a pattern should be reused when a file rotates.
|
|
detectedPattern *DetectedPattern
|
|
}
|
|
|
|
// InitializeDecoder returns a properly initialized Decoder
|
|
func InitializeDecoder(source *config.LogSource, parser parser.Parser) *Decoder {
|
|
return NewDecoderWithEndLineMatcher(source, parser, &NewLineMatcher{}, nil)
|
|
}
|
|
|
|
// NewDecoderWithEndLineMatcher initialize a decoder with given endline strategy.
|
|
func NewDecoderWithEndLineMatcher(source *config.LogSource, parser parser.Parser, matcher EndLineMatcher, multiLinePattern *regexp.Regexp) *Decoder {
|
|
inputChan := make(chan *Input)
|
|
outputChan := make(chan *Message)
|
|
lineLimit := defaultContentLenLimit
|
|
var lineHandler LineHandler
|
|
var lineParser LineParser
|
|
detectedPattern := &DetectedPattern{}
|
|
|
|
for _, rule := range source.Config.ProcessingRules {
|
|
if rule.Type == config.MultiLine {
|
|
lh := NewMultiLineHandler(outputChan, rule.Regex, config.AggregationTimeout(), lineLimit)
|
|
|
|
// Since a single source can have multiple file tailers - each with their own decoder instance,
|
|
// Make sure we keep track of the multiline match count info from all of the decoders so the
|
|
// status page displays it correctly.
|
|
if existingInfo, ok := source.GetInfo(lh.countInfo.InfoKey()).(*config.CountInfo); ok {
|
|
// override the new decoders info to the instance we are already using
|
|
lh.countInfo = existingInfo
|
|
} else {
|
|
// this is the first decoder we have seen for this source - use it's count info
|
|
source.RegisterInfo(lh.countInfo)
|
|
}
|
|
lineHandler = lh
|
|
}
|
|
}
|
|
if lineHandler == nil {
|
|
// TODO configure multiline
|
|
if source.Config.AutoMultiLine {
|
|
log.Println("Auto multi line log detection enabled")
|
|
|
|
if multiLinePattern != nil {
|
|
log.Println("Found a previously detected pattern - using multiline handler")
|
|
|
|
// Save the pattern again for the next rotation
|
|
detectedPattern.Set(multiLinePattern)
|
|
|
|
lineHandler = NewMultiLineHandler(outputChan, multiLinePattern, config.AggregationTimeout(), lineLimit)
|
|
} else {
|
|
lineHandler = buildAutoMultilineHandlerFromConfig(outputChan, lineLimit, source, detectedPattern)
|
|
}
|
|
} else {
|
|
lineHandler = NewSingleLineHandler(outputChan, lineLimit)
|
|
}
|
|
}
|
|
|
|
if parser.SupportsPartialLine() {
|
|
lineParser = NewMultiLineParser(config.AggregationTimeout(), parser, lineHandler, lineLimit)
|
|
} else {
|
|
lineParser = NewSingleLineParser(parser, lineHandler)
|
|
}
|
|
|
|
return New(inputChan, outputChan, lineParser, lineLimit, matcher, detectedPattern)
|
|
}
|
|
|
|
func buildAutoMultilineHandlerFromConfig(outputChan chan *Message, lineLimit int, source *config.LogSource, detectedPattern *DetectedPattern) *AutoMultilineHandler {
|
|
linesToSample := source.Config.AutoMultiLineSampleSize
|
|
if linesToSample <= 0 {
|
|
linesToSample = 500 // TODO
|
|
}
|
|
matchThreshold := source.Config.AutoMultiLineMatchThreshold
|
|
if matchThreshold == 0 {
|
|
matchThreshold = 0.48 // TODO
|
|
}
|
|
additionalPatterns := []string{} // TODO
|
|
additionalPatternsCompiled := []*regexp.Regexp{}
|
|
|
|
for _, p := range additionalPatterns {
|
|
compiled, err := regexp.Compile("^" + p)
|
|
if err != nil {
|
|
log.Println("logs_config.auto_multi_line_extra_patterns containing value: ", p, " is not a valid regular expression")
|
|
continue
|
|
}
|
|
additionalPatternsCompiled = append(additionalPatternsCompiled, compiled)
|
|
}
|
|
|
|
matchTimeout := time.Second * time.Duration(30)
|
|
return NewAutoMultilineHandler(outputChan,
|
|
lineLimit,
|
|
linesToSample,
|
|
matchThreshold,
|
|
matchTimeout,
|
|
config.AggregationTimeout(),
|
|
source,
|
|
additionalPatternsCompiled,
|
|
detectedPattern)
|
|
}
|
|
|
|
// New returns an initialized Decoder
|
|
func New(InputChan chan *Input, OutputChan chan *Message, lineParser LineParser, contentLenLimit int, matcher EndLineMatcher, detectedPattern *DetectedPattern) *Decoder {
|
|
var lineBuffer bytes.Buffer
|
|
return &Decoder{
|
|
InputChan: InputChan,
|
|
OutputChan: OutputChan,
|
|
lineBuffer: &lineBuffer,
|
|
lineParser: lineParser,
|
|
contentLenLimit: contentLenLimit,
|
|
matcher: matcher,
|
|
detectedPattern: detectedPattern,
|
|
}
|
|
}
|
|
|
|
// Start starts the Decoder
|
|
func (d *Decoder) Start() {
|
|
d.lineParser.Start()
|
|
go d.run()
|
|
}
|
|
|
|
// Stop stops the Decoder
|
|
func (d *Decoder) Stop() {
|
|
close(d.InputChan)
|
|
}
|
|
|
|
// GetLineCount returns the number of decoded lines
|
|
func (d *Decoder) GetLineCount() int64 {
|
|
return atomic.LoadInt64(&d.linesDecoded)
|
|
}
|
|
|
|
// GetDetectedPattern returns a detected pattern (if any)
|
|
func (d *Decoder) GetDetectedPattern() *regexp.Regexp {
|
|
if d.detectedPattern == nil {
|
|
return nil
|
|
}
|
|
return d.detectedPattern.Get()
|
|
}
|
|
|
|
// run lets the Decoder handle data coming from InputChan
|
|
func (d *Decoder) run() {
|
|
for data := range d.InputChan {
|
|
d.decodeIncomingData(data.content)
|
|
}
|
|
// finish to stop decoder
|
|
d.lineParser.Stop()
|
|
}
|
|
|
|
// decodeIncomingData splits raw data based on '\n', creates and processes new lines
|
|
func (d *Decoder) decodeIncomingData(inBuf []byte) {
|
|
i, j := 0, 0
|
|
n := len(inBuf)
|
|
maxj := d.contentLenLimit - d.lineBuffer.Len()
|
|
|
|
for ; j < n; j++ {
|
|
if j == maxj {
|
|
// send line because it is too long
|
|
d.lineBuffer.Write(inBuf[i:j])
|
|
d.rawDataLen += (j - i)
|
|
d.sendLine()
|
|
i = j
|
|
maxj = i + d.contentLenLimit
|
|
} else if d.matcher.Match(d.lineBuffer.Bytes(), inBuf, i, j) {
|
|
d.lineBuffer.Write(inBuf[i:j])
|
|
d.rawDataLen += (j - i)
|
|
d.rawDataLen++ // account for the matching byte
|
|
d.sendLine()
|
|
i = j + 1 // skip the last bytes of the matched sequence
|
|
maxj = i + d.contentLenLimit
|
|
}
|
|
}
|
|
d.lineBuffer.Write(inBuf[i:j])
|
|
d.rawDataLen += (j - i)
|
|
}
|
|
|
|
// sendLine copies content from lineBuffer which is passed to lineHandler
|
|
func (d *Decoder) sendLine() {
|
|
// Account for longer-than-1-byte line separator
|
|
content := make([]byte, d.lineBuffer.Len()-(d.matcher.SeparatorLen()-1))
|
|
copy(content, d.lineBuffer.Bytes())
|
|
d.lineBuffer.Reset()
|
|
d.lineParser.Handle(NewDecodedInput(content, d.rawDataLen))
|
|
d.rawDataLen = 0
|
|
atomic.AddInt64(&d.linesDecoded, 1)
|
|
}
|