nightingale1/vendor/github.com/lightstep/lightstep-tracer-go/tracer.go

448 lines
13 KiB
Go

// Package lightstep implements the LightStep OpenTracing client for Go.
package lightstep
import (
"context"
"fmt"
"runtime"
"sync"
"time"
"github.com/opentracing/opentracing-go"
)
// Tracer extends the `opentracing.Tracer` interface with methods for manual
// flushing and closing. To access these methods, you can take the global
// tracer and typecast it to a `lightstep.Tracer`. As a convenience, the
// lightstep package provides static functions which perform the typecasting.
type Tracer interface {
opentracing.Tracer
// Close flushes and then terminates the LightStep collector
Close(context.Context)
// Flush sends all spans currently in the buffer to the LighStep collector
Flush(context.Context)
// Options gets the Options used in New() or NewWithOptions().
Options() Options
// Disable prevents the tracer from recording spans or flushing
Disable()
}
// Implements the `Tracer` interface. Buffers spans and forwards to a Lightstep collector.
type tracerImpl struct {
//////////////////////////////////////////////////////////////
// IMMUTABLE IMMUTABLE IMMUTABLE IMMUTABLE IMMUTABLE IMMUTABLE
//////////////////////////////////////////////////////////////
// Note: there may be a desire to update some of these fields
// at runtime, in which case suitable changes may be needed
// for variables accessed during Flush.
reporterID uint64 // the LightStep tracer guid
opts Options
// report loop management
closeOnce sync.Once
closeReportLoopChannel chan struct{}
reportLoopClosedChannel chan struct{}
converter *protoConverter
accessToken string
attributes map[string]string
//////////////////////////////////////////////////////////
// MUTABLE MUTABLE MUTABLE MUTABLE MUTABLE MUTABLE MUTABLE
//////////////////////////////////////////////////////////
// the following fields are modified under `lock`.
lock sync.Mutex
// Remote service that will receive reports.
client collectorClient
connection Connection
// Two buffers of data.
buffer reportBuffer
flushing reportBuffer
// Flush state.
flushingLock sync.Mutex
reportInFlight bool
lastReportAttempt time.Time
// Meta Event Reporting can be enabled at tracer creation or on-demand by satellite
metaEventReportingEnabled bool
// Set to true on first report
firstReportHasRun bool
// We allow our remote peer to disable this instrumentation at any
// time, turning all potentially costly runtime operations into
// no-ops.
//
// TODO this should use atomic load/store to test disabled
// prior to taking the lock, do please.
disabled bool
// Map of propagators used to determine the correct propagator to use
// based on the format passed into Inject/Extract. Supports one
// propagator for each of the formats: TextMap, HTTPHeaders, Binary
propagators map[opentracing.BuiltinFormat]Propagator
}
// NewTracer creates and starts a new Lightstep Tracer.
// In case of error, we emit event and return nil.
func NewTracer(opts Options) Tracer {
tr, err := CreateTracer(opts)
if err != nil {
emitEvent(newEventStartError(err))
return nil
}
return tr
}
// CreateTracer creates and starts a new Lightstep Tracer.
// It is meant to replace NewTracer which does not propagate the error.
func CreateTracer(opts Options) (Tracer, error) {
if err := opts.Initialize(); err != nil {
return nil, fmt.Errorf("init; err: %v", err)
}
attributes := map[string]string{}
for k, v := range opts.Tags {
attributes[k] = fmt.Sprint(v)
}
// Don't let the GrpcOptions override these values. That would be confusing.
attributes[TracerPlatformKey] = TracerPlatformValue
attributes[TracerPlatformVersionKey] = runtime.Version()
attributes[TracerVersionKey] = TracerVersionValue
now := time.Now()
impl := &tracerImpl{
opts: opts,
reporterID: genSeededGUID(),
buffer: newSpansBuffer(opts.MaxBufferedSpans),
flushing: newSpansBuffer(opts.MaxBufferedSpans),
closeReportLoopChannel: make(chan struct{}),
reportLoopClosedChannel: make(chan struct{}),
converter: newProtoConverter(opts),
accessToken: opts.AccessToken,
attributes: attributes,
}
impl.buffer.setCurrent(now)
var err error
impl.client, err = newCollectorClient(opts)
if err != nil {
return nil, fmt.Errorf("create collector client; err: %v", err)
}
conn, err := impl.client.ConnectClient()
if err != nil {
return nil, err
}
impl.connection = conn
// set meta reporting to defined option
impl.metaEventReportingEnabled = opts.MetaEventReportingEnabled
impl.firstReportHasRun = false
go impl.reportLoop()
impl.propagators = map[opentracing.BuiltinFormat]Propagator{
opentracing.TextMap: LightStepPropagator,
opentracing.HTTPHeaders: LightStepPropagator,
opentracing.Binary: BinaryPropagator,
}
for builtin, propagator := range opts.Propagators {
impl.propagators[builtin] = propagator
}
return impl, nil
}
func (tracer *tracerImpl) Options() Options {
return tracer.opts
}
func (tracer *tracerImpl) StartSpan(
operationName string,
sso ...opentracing.StartSpanOption,
) opentracing.Span {
return newSpan(operationName, tracer, sso)
}
func (tracer *tracerImpl) Inject(sc opentracing.SpanContext, format interface{}, carrier interface{}) error {
if tracer.opts.MetaEventReportingEnabled {
opentracing.StartSpan(LSMetaEvent_InjectOperation,
opentracing.Tag{Key: LSMetaEvent_MetaEventKey, Value: true},
opentracing.Tag{Key: LSMetaEvent_TraceIdKey, Value: sc.(SpanContext).TraceID},
opentracing.Tag{Key: LSMetaEvent_SpanIdKey, Value: sc.(SpanContext).SpanID},
opentracing.Tag{Key: LSMetaEvent_PropagationFormatKey, Value: format}).
Finish()
}
builtin, ok := format.(opentracing.BuiltinFormat)
if !ok {
return opentracing.ErrUnsupportedFormat
}
return tracer.propagators[builtin].Inject(sc, carrier)
}
func (tracer *tracerImpl) Extract(format interface{}, carrier interface{}) (opentracing.SpanContext, error) {
if tracer.opts.MetaEventReportingEnabled {
opentracing.StartSpan(LSMetaEvent_ExtractOperation,
opentracing.Tag{Key: LSMetaEvent_MetaEventKey, Value: true},
opentracing.Tag{Key: LSMetaEvent_PropagationFormatKey, Value: format}).
Finish()
}
builtin, ok := format.(opentracing.BuiltinFormat)
if !ok {
return nil, opentracing.ErrUnsupportedFormat
}
return tracer.propagators[builtin].Extract(carrier)
}
func (tracer *tracerImpl) reconnectClient(now time.Time) {
conn, err := tracer.client.ConnectClient()
if err != nil {
emitEvent(newEventConnectionError(err))
} else {
tracer.lock.Lock()
oldConn := tracer.connection
tracer.connection = conn
tracer.lock.Unlock()
oldConn.Close()
}
}
// Close flushes and then terminates the LightStep collector. Close may only be
// called once; subsequent calls to Close are no-ops.
func (tracer *tracerImpl) Close(ctx context.Context) {
tracer.closeOnce.Do(func() {
// notify report loop that we are closing
close(tracer.closeReportLoopChannel)
select {
case <-tracer.reportLoopClosedChannel:
tracer.Flush(ctx)
case <-ctx.Done():
return
}
// now its safe to close the connection
tracer.lock.Lock()
conn := tracer.connection
tracer.connection = nil
tracer.lock.Unlock()
if conn != nil {
err := conn.Close()
if err != nil {
emitEvent(newEventConnectionError(err))
}
}
})
}
// RecordSpan records a finished Span.
func (tracer *tracerImpl) RecordSpan(raw RawSpan) {
tracer.lock.Lock()
// Early-out for disabled runtimes
if tracer.disabled {
tracer.lock.Unlock()
return
}
tracer.buffer.addSpan(raw)
tracer.lock.Unlock()
if tracer.opts.Recorder != nil {
tracer.opts.Recorder.RecordSpan(raw)
}
}
// Flush sends all buffered data to the collector.
func (tracer *tracerImpl) Flush(ctx context.Context) {
tracer.flushingLock.Lock()
defer tracer.flushingLock.Unlock()
if errorEvent := tracer.preFlush(); errorEvent != nil {
emitEvent(errorEvent)
return
}
if tracer.opts.MetaEventReportingEnabled && !tracer.firstReportHasRun {
opentracing.StartSpan(LSMetaEvent_TracerCreateOperation,
opentracing.Tag{Key: LSMetaEvent_MetaEventKey, Value: true},
opentracing.Tag{Key: LSMetaEvent_TracerGuidKey, Value: tracer.reporterID}).
Finish()
tracer.firstReportHasRun = true
}
ctx, cancel := context.WithTimeout(ctx, tracer.opts.ReportTimeout)
defer cancel()
protoReq := tracer.converter.toReportRequest(
tracer.reporterID,
tracer.attributes,
tracer.accessToken,
&tracer.flushing,
)
req, err := tracer.client.Translate(protoReq)
if err != nil {
errorEvent := newEventFlushError(err, FlushErrorTranslate)
emitEvent(errorEvent)
// call postflush to prevent the tracer from going into an invalid state.
emitEvent(tracer.postFlush(errorEvent))
return
}
var reportErrorEvent *eventFlushError
resp, err := tracer.client.Report(ctx, req)
if err != nil {
reportErrorEvent = newEventFlushError(err, FlushErrorTransport)
} else if len(resp.GetErrors()) > 0 {
reportErrorEvent = newEventFlushError(fmt.Errorf(resp.GetErrors()[0]), FlushErrorReport)
}
if reportErrorEvent != nil {
emitEvent(reportErrorEvent)
}
emitEvent(tracer.postFlush(reportErrorEvent))
if err == nil && resp.DevMode() {
tracer.metaEventReportingEnabled = true
}
if err == nil && !resp.DevMode() {
tracer.metaEventReportingEnabled = false
}
if err == nil && resp.Disable() {
tracer.Disable()
}
}
// preFlush handles lock-protected data manipulation before flushing
func (tracer *tracerImpl) preFlush() *eventFlushError {
tracer.lock.Lock()
defer tracer.lock.Unlock()
if tracer.disabled {
return newEventFlushError(errFlushFailedTracerClosed, FlushErrorTracerDisabled)
}
if tracer.connection == nil {
return newEventFlushError(errFlushFailedTracerClosed, FlushErrorTracerClosed)
}
now := time.Now()
tracer.buffer, tracer.flushing = tracer.flushing, tracer.buffer
tracer.reportInFlight = true
tracer.flushing.setFlushing(now)
tracer.buffer.setCurrent(now)
tracer.lastReportAttempt = now
return nil
}
// postFlush handles lock-protected data manipulation after flushing
func (tracer *tracerImpl) postFlush(flushEventError *eventFlushError) *eventStatusReport {
tracer.lock.Lock()
defer tracer.lock.Unlock()
tracer.reportInFlight = false
statusReportEvent := newEventStatusReport(
tracer.flushing.reportStart,
tracer.flushing.reportEnd,
len(tracer.flushing.rawSpans),
int(tracer.flushing.droppedSpanCount+tracer.buffer.droppedSpanCount),
int(tracer.flushing.logEncoderErrorCount+tracer.buffer.logEncoderErrorCount),
)
if flushEventError == nil {
tracer.flushing.clear()
return statusReportEvent
}
switch flushEventError.State() {
case FlushErrorTranslate:
// When there's a translation error, we do not want to retry.
tracer.flushing.clear()
default:
// Restore the records that did not get sent correctly
tracer.buffer.mergeFrom(&tracer.flushing)
}
statusReportEvent.SetSentSpans(0)
return statusReportEvent
}
func (tracer *tracerImpl) Disable() {
tracer.lock.Lock()
if tracer.disabled {
tracer.lock.Unlock()
return
}
tracer.disabled = true
tracer.buffer.clear()
tracer.lock.Unlock()
emitEvent(newEventTracerDisabled())
}
// Every MinReportingPeriod the reporting loop wakes up and checks to see if
// either (a) the Runtime's max reporting period is about to expire (see
// maxReportingPeriod()), (b) the number of buffered log records is
// approaching kMaxBufferedLogs, or if (c) the number of buffered span records
// is approaching kMaxBufferedSpans. If any of those conditions are true,
// pending data is flushed to the remote peer. If not, the reporting loop waits
// until the next cycle. See Runtime.maybeFlush() for details.
//
// This could alternatively be implemented using flush channels and so forth,
// but that would introduce opportunities for client code to block on the
// runtime library, and we want to avoid that at all costs (even dropping data,
// which can certainly happen with high data rates and/or unresponsive remote
// peers).
func (tracer *tracerImpl) shouldFlushLocked(now time.Time) bool {
if now.Add(tracer.opts.MinReportingPeriod).Sub(tracer.lastReportAttempt) > tracer.opts.ReportingPeriod {
return true
} else if tracer.buffer.isHalfFull() {
return true
}
return false
}
func (tracer *tracerImpl) reportLoop() {
tickerChan := time.Tick(tracer.opts.MinReportingPeriod)
for {
select {
case <-tickerChan:
now := time.Now()
tracer.lock.Lock()
disabled := tracer.disabled
reconnect := !tracer.reportInFlight && tracer.client.ShouldReconnect()
shouldFlush := tracer.shouldFlushLocked(now)
tracer.lock.Unlock()
if disabled {
return
}
if shouldFlush {
tracer.Flush(context.Background())
}
if reconnect {
tracer.reconnectClient(now)
}
case <-tracer.closeReportLoopChannel:
close(tracer.reportLoopClosedChannel)
return
}
}
}