observability

package
v1.31.4 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 16, 2026 License: MIT Imports: 22 Imported by: 0

Documentation

Overview

Package observability provides OpenTelemetry tracing and Prometheus metrics.

This package combines the production-tested foundation from legacy Hector with GenAI semantic conventions from adk-go for ecosystem compatibility.

Architecture

The observability system has three main components:

  1. Tracing: OpenTelemetry spans with OTLP export
  2. Metrics: Prometheus counters and histograms
  3. Debug: In-memory span capture for web UI inspection

Configuration

Configure observability in your hector.yaml:

server:
  observability:
    tracing:
      enabled: true
      exporter: otlp
      endpoint: localhost:4317
      sampling_rate: 1.0
      service_name: my-agent
    metrics:
      enabled: true
      endpoint: /metrics

Index

Constants

View Source
const (
	// AttrServiceName is the logical name of the service.
	AttrServiceName = "service.name"

	// AttrServiceVersion is the version of the service.
	AttrServiceVersion = "service.version"

	// AttrServiceInstance is the instance ID of the service.
	AttrServiceInstance = "service.instance.id"
)
View Source
const (
	// AttrGenAISystem identifies the GenAI system (e.g., "hector", "openai").
	AttrGenAISystem = "gen_ai.system"

	// AttrGenAIOperationName is the operation being performed.
	// Values: "chat", "text_completion", "embeddings"
	AttrGenAIOperationName = "gen_ai.operation.name"

	// AttrGenAIRequestModel is the name of the model being used.
	AttrGenAIRequestModel = "gen_ai.request.model"

	// AttrGenAIRequestTemperature is the temperature parameter.
	AttrGenAIRequestTemperature = "gen_ai.request.temperature"

	// AttrGenAIRequestTopP is the top_p parameter.
	AttrGenAIRequestTopP = "gen_ai.request.top_p"

	// AttrGenAIRequestMaxTokens is the maximum tokens requested.
	AttrGenAIRequestMaxTokens = "gen_ai.request.max_tokens"

	// AttrGenAIResponseFinishReason is why generation stopped.
	// Values: "stop", "length", "tool_calls", "content_filter"
	AttrGenAIResponseFinishReason = "gen_ai.response.finish_reason"

	// AttrGenAIUsageInputTokens is the number of input tokens.
	AttrGenAIUsageInputTokens = "gen_ai.usage.input_tokens"

	// AttrGenAIUsageOutputTokens is the number of output tokens.
	AttrGenAIUsageOutputTokens = "gen_ai.usage.output_tokens"

	// AttrGenAIToolName is the name of the tool being called.
	AttrGenAIToolName = "gen_ai.tool.name"

	// AttrGenAIToolDescription is the description of the tool.
	AttrGenAIToolDescription = "gen_ai.tool.description"

	// AttrGenAIToolCallID is the unique ID of the tool call.
	AttrGenAIToolCallID = "gen_ai.tool.call.id"
)
View Source
const (
	// AttrHectorAgentName is the name of the agent.
	AttrHectorAgentName = "hector.agent.name"

	// AttrHectorAgentType is the type of agent (llm, workflow, remote).
	AttrHectorAgentType = "hector.agent.type"

	// AttrHectorInvocationID is the unique ID for this agent invocation.
	AttrHectorInvocationID = "hector.invocation_id"

	// AttrHectorSessionID is the session ID.
	AttrHectorSessionID = "hector.session_id"

	// AttrHectorUserID is the user ID.
	AttrHectorUserID = "hector.user_id"

	// AttrHectorEventID is the event ID within a session.
	AttrHectorEventID = "hector.event_id"

	// AttrHectorLLMRequest is the serialized LLM request (optional, for debugging).
	AttrHectorLLMRequest = "hector.llm.request"

	// AttrHectorLLMResponse is the serialized LLM response (optional, for debugging).
	AttrHectorLLMResponse = "hector.llm.response"

	// AttrHectorToolArgs is the serialized tool arguments (optional, for debugging).
	AttrHectorToolArgs = "hector.tool.args"

	// AttrHectorToolResponse is the serialized tool response (optional, for debugging).
	AttrHectorToolResponse = "hector.tool.response"

	// AttrHectorThinkingBlocks is the number of thinking blocks in response.
	AttrHectorThinkingBlocks = "hector.llm.thinking.blocks"

	// AttrHectorThinkingLength is the total length of thinking content (chars).
	AttrHectorThinkingLength = "hector.llm.thinking.length"
)
View Source
const (
	// AttrHTTPMethod is the HTTP method.
	AttrHTTPMethod = "http.method"

	// AttrHTTPPath is the HTTP path (route pattern, not raw path).
	AttrHTTPPath = "http.route"

	// AttrHTTPStatusCode is the HTTP response status code.
	AttrHTTPStatusCode = "http.status_code"

	// AttrHTTPRequestSize is the request body size in bytes.
	AttrHTTPRequestSize = "http.request.body.size"

	// AttrHTTPResponseSize is the response body size in bytes.
	AttrHTTPResponseSize = "http.response.body.size"
)
View Source
const (
	// AttrErrorType is the type of error that occurred.
	AttrErrorType = "error.type"

	// AttrErrorMessage is the error message.
	AttrErrorMessage = "error.message"
)
View Source
const (
	// AttrRAGStoreName is the name of the document store.
	AttrRAGStoreName = "hector.rag.store.name"

	// AttrRAGQuery is the search query.
	AttrRAGQuery = "hector.rag.query"

	// AttrRAGResultCount is the number of search results.
	AttrRAGResultCount = "hector.rag.result_count"

	// AttrRAGTopK is the requested number of results.
	AttrRAGTopK = "hector.rag.top_k"

	// AttrRAGDocumentCount is the number of documents indexed.
	AttrRAGDocumentCount = "hector.rag.document_count"

	// AttrRAGChunkCount is the number of chunks indexed.
	AttrRAGChunkCount = "hector.rag.chunk_count"

	// AttrRAGSourceType is the data source type (directory, sql, api).
	AttrRAGSourceType = "hector.rag.source_type"

	// AttrRAGChunkerStrategy is the chunking strategy used.
	AttrRAGChunkerStrategy = "hector.rag.chunker_strategy"

	// AttrRAGHyDEEnabled indicates if HyDE was used.
	AttrRAGHyDEEnabled = "hector.rag.hyde_enabled"

	// AttrRAGRerankEnabled indicates if reranking was used.
	AttrRAGRerankEnabled = "hector.rag.rerank_enabled"

	// AttrRAGMultiQueryEnabled indicates if multi-query was used.
	AttrRAGMultiQueryEnabled = "hector.rag.multiquery_enabled"

	// AttrRAGEmbeddingModel is the embedding model used.
	AttrRAGEmbeddingModel = "hector.rag.embedding_model"

	// AttrRAGVectorProvider is the vector database provider.
	AttrRAGVectorProvider = "hector.rag.vector_provider"
)
View Source
const (
	// SpanAgentRun is the top-level span for an agent invocation.
	SpanAgentRun = "hector.agent.run"

	// SpanLLMCall is a span for an LLM API call.
	SpanLLMCall = "hector.llm.call"

	// SpanToolExecution is a span for tool execution.
	SpanToolExecution = "hector.tool.execute"

	// SpanMemorySearch is a span for memory/index search.
	SpanMemorySearch = "hector.memory.search"

	// SpanSessionLoad is a span for loading session data.
	SpanSessionLoad = "hector.session.load"

	// SpanHTTPRequest is a span for HTTP request handling.
	SpanHTTPRequest = "hector.http.request"

	// SpanRAGSearch is a span for RAG search operations.
	SpanRAGSearch = "hector.rag.search"

	// SpanRAGIndex is a span for RAG indexing operations.
	SpanRAGIndex = "hector.rag.index"

	// SpanRAGEmbed is a span for embedding generation.
	SpanRAGEmbed = "hector.rag.embed"

	// SpanRAGChunk is a span for document chunking.
	SpanRAGChunk = "hector.rag.chunk"

	// SpanRAGRerank is a span for result reranking.
	SpanRAGRerank = "hector.rag.rerank"

	// SpanRAGHyDE is a span for HyDE hypothetical document generation.
	SpanRAGHyDE = "hector.rag.hyde"
)
View Source
const (
	// DefaultServiceName is the default service name for tracing.
	DefaultServiceName = "hector"

	// DefaultSamplingRate is the default trace sampling rate.
	DefaultSamplingRate = 1.0

	// DefaultOTLPEndpoint is the default OTLP endpoint.
	DefaultOTLPEndpoint = "localhost:4317"

	// DefaultMetricsPath is the default Prometheus metrics endpoint.
	DefaultMetricsPath = "/metrics"
)
View Source
const (
	// OpChat is a chat completion operation.
	OpChat = "chat"

	// OpTextCompletion is a text completion operation.
	OpTextCompletion = "text_completion"

	// OpEmbeddings is an embeddings generation operation.
	OpEmbeddings = "embeddings"

	// OpToolCall is a tool execution operation.
	OpToolCall = "execute_tool"
)

Variables

This section is empty.

Functions

func HTTPMiddleware

func HTTPMiddleware(tracer *Tracer, metrics *Metrics) func(http.Handler) http.Handler

HTTPMiddleware creates HTTP middleware that records both traces and metrics.

func MetricsMiddleware

func MetricsMiddleware(metrics *Metrics) func(http.Handler) http.Handler

MetricsMiddleware creates HTTP middleware that only records metrics.

func TracingMiddleware

func TracingMiddleware(tracer *Tracer) func(http.Handler) http.Handler

TracingMiddleware creates HTTP middleware that only records traces.

Types

type Config

type Config struct {
	// Tracing configures OpenTelemetry distributed tracing.
	Tracing TracingConfig `yaml:"tracing,omitempty"`

	// Metrics configures Prometheus metrics collection.
	Metrics MetricsConfig `yaml:"metrics,omitempty"`
}

Config configures the observability system.

func (*Config) SetDefaults

func (c *Config) SetDefaults()

SetDefaults applies default values to Config.

func (*Config) Validate

func (c *Config) Validate() error

Validate checks the Config for errors.

type DebugExporter

type DebugExporter struct {
	// contains filtered or unexported fields
}

DebugExporter is a custom SpanExporter that stores span data in memory for web UI inspection and debugging.

Inspired by adk-go's APIServerSpanExporter, this exporter captures relevant span attributes (LLM calls, tool executions, agent runs) keyed by event ID for easy lookup.

Thread-safe for concurrent reads and writes.

func NewDebugExporter

func NewDebugExporter() *DebugExporter

NewDebugExporter creates a new DebugExporter.

func (*DebugExporter) Clear

func (e *DebugExporter) Clear()

Clear removes all captured spans.

func (*DebugExporter) Count

func (e *DebugExporter) Count() int

Count returns the number of captured spans.

func (*DebugExporter) ExportSpans

func (e *DebugExporter) ExportSpans(ctx context.Context, spans []sdktrace.ReadOnlySpan) error

ExportSpans implements sdktrace.SpanExporter. It captures span data for relevant spans (LLM calls, tool executions, etc.).

func (*DebugExporter) GetAllSpans

func (e *DebugExporter) GetAllSpans() []*DebugSpan

GetAllSpans returns all captured spans.

func (*DebugExporter) GetByEventID

func (e *DebugExporter) GetByEventID(eventID string) *DebugSpan

GetByEventID returns a span by its hector.event_id attribute.

func (*DebugExporter) GetSpan

func (e *DebugExporter) GetSpan(spanID string) *DebugSpan

GetSpan returns a span by its span ID.

func (*DebugExporter) GetSpansByName

func (e *DebugExporter) GetSpansByName(name string) []*DebugSpan

GetSpansByName returns all spans with the given name.

func (*DebugExporter) GetSpansByTrace

func (e *DebugExporter) GetSpansByTrace(traceID string) []*DebugSpan

GetSpansByTrace returns all spans for a given trace ID.

func (*DebugExporter) Shutdown

func (e *DebugExporter) Shutdown(ctx context.Context) error

Shutdown implements sdktrace.SpanExporter.

func (*DebugExporter) WithMaxSize

func (e *DebugExporter) WithMaxSize(size int) *DebugExporter

WithMaxSize sets the maximum number of spans to retain.

type DebugSpan

type DebugSpan struct {
	TraceID      string            `json:"trace_id"`
	SpanID       string            `json:"span_id"`
	ParentSpanID string            `json:"parent_span_id,omitempty"`
	Name         string            `json:"name"`
	StartTime    int64             `json:"start_time_unix_nano"`
	EndTime      int64             `json:"end_time_unix_nano"`
	DurationMs   float64           `json:"duration_ms"`
	Attributes   map[string]string `json:"attributes"`
	Events       []SpanEvent       `json:"events,omitempty"`
	Status       string            `json:"status"`
	StatusMsg    string            `json:"status_message,omitempty"`
}

DebugSpan contains captured span information for debugging.

type Manager

type Manager struct {
	// contains filtered or unexported fields
}

Manager manages the lifecycle of all observability components. It provides a unified interface for initializing, accessing, and shutting down tracing and metrics systems.

func MustNewManager

func MustNewManager(ctx context.Context, cfg *Config) *Manager

MustNewManager creates a Manager and panics on error. Useful for initialization in main() when errors are fatal.

func NewFromConfig

func NewFromConfig(ctx context.Context, cfg *Config) (*Manager, error)

NewFromConfig creates a Manager with defaults from a configuration pointer. This is useful when the config might be nil.

func NewManager

func NewManager(ctx context.Context, cfg *Config) (*Manager, error)

NewManager creates a new observability Manager from configuration.

func NoopManager

func NoopManager() *Manager

NoopManager returns a no-operation Manager that does nothing. Use this when observability is completely disabled.

func (*Manager) DebugExporter

func (m *Manager) DebugExporter() *DebugExporter

DebugExporter returns the debug span exporter, or nil if not enabled.

func (*Manager) Metrics

func (m *Manager) Metrics() *Metrics

Metrics returns the metrics instance, or nil if metrics are disabled.

func (*Manager) MetricsEnabled

func (m *Manager) MetricsEnabled() bool

MetricsEnabled returns whether metrics are enabled.

func (*Manager) MetricsEndpoint

func (m *Manager) MetricsEndpoint() string

MetricsEndpoint returns the configured metrics endpoint path.

func (*Manager) MetricsHandler

func (m *Manager) MetricsHandler() http.Handler

MetricsHandler returns an HTTP handler for the metrics endpoint.

func (*Manager) Shutdown

func (m *Manager) Shutdown(ctx context.Context) error

Shutdown gracefully shuts down all observability components.

func (*Manager) Tracer

func (m *Manager) Tracer() *Tracer

Tracer returns the tracer instance, or nil if tracing is disabled.

func (*Manager) TracingEnabled

func (m *Manager) TracingEnabled() bool

TracingEnabled returns whether tracing is enabled.

type Metrics

type Metrics struct {
	// contains filtered or unexported fields
}

Metrics provides Prometheus metrics collection for Hector.

func NewMetrics

func NewMetrics(cfg *MetricsConfig) (*Metrics, error)

NewMetrics creates a new Metrics instance from configuration.

func (*Metrics) DecAgentActiveRuns

func (m *Metrics) DecAgentActiveRuns(agentName string)

DecAgentActiveRuns decrements the active runs counter.

func (*Metrics) Handler

func (m *Metrics) Handler() http.Handler

Handler returns an HTTP handler for the Prometheus metrics endpoint.

func (*Metrics) IncAgentActiveRuns

func (m *Metrics) IncAgentActiveRuns(agentName string)

IncAgentActiveRuns increments the active runs counter.

func (*Metrics) RecordAgentCall

func (m *Metrics) RecordAgentCall(agentName, agentType string, duration time.Duration)

RecordAgentCall records an agent invocation.

func (*Metrics) RecordAgentError

func (m *Metrics) RecordAgentError(agentName, agentType, errorType string)

RecordAgentError records an agent error.

func (*Metrics) RecordAppLoad added in v1.21.0

func (m *Metrics) RecordAppLoad(appName, status string, duration time.Duration)

func (*Metrics) RecordAppUnload added in v1.21.0

func (m *Metrics) RecordAppUnload(appName string)

func (*Metrics) RecordHTTPRequest

func (m *Metrics) RecordHTTPRequest(method, path string, statusCode int, duration time.Duration, reqSize, respSize int64)

RecordHTTPRequest records an HTTP request.

func (*Metrics) RecordLLMCall

func (m *Metrics) RecordLLMCall(model, provider string, duration time.Duration)

RecordLLMCall records an LLM API call.

func (*Metrics) RecordLLMError

func (m *Metrics) RecordLLMError(model, provider, errorType string)

RecordLLMError records an LLM error.

func (*Metrics) RecordLLMTokens

func (m *Metrics) RecordLLMTokens(model, provider string, inputTokens, outputTokens int)

RecordLLMTokens records token usage.

func (*Metrics) RecordMemoryIndexed

func (m *Metrics) RecordMemoryIndexed(indexType string, count int)

RecordMemoryIndexed records items being indexed.

func (*Metrics) RecordMemorySearch

func (m *Metrics) RecordMemorySearch(indexType string, duration time.Duration)

RecordMemorySearch records a memory search operation.

func (*Metrics) RecordNotificationRetry added in v1.21.0

func (m *Metrics) RecordNotificationRetry(agentName, notifType string)

func (*Metrics) RecordNotificationSent added in v1.21.0

func (m *Metrics) RecordNotificationSent(agentName, notifType, status string, duration time.Duration)

func (*Metrics) RecordQueueLatency added in v1.21.0

func (m *Metrics) RecordQueueLatency(appName, agentName string, duration time.Duration)

func (*Metrics) RecordQueueProcessingDuration added in v1.21.0

func (m *Metrics) RecordQueueProcessingDuration(appName, agentName string, duration time.Duration)

func (*Metrics) RecordRAGDocError

func (m *Metrics) RecordRAGDocError(storeName string)

RecordRAGDocError records a document indexing error.

func (*Metrics) RecordRAGDocIndexed

func (m *Metrics) RecordRAGDocIndexed(storeName string, duration time.Duration)

RecordRAGDocIndexed records a document being indexed.

func (*Metrics) RecordRAGDocSkipped

func (m *Metrics) RecordRAGDocSkipped(storeName string)

RecordRAGDocSkipped records a document being skipped.

func (*Metrics) RecordRAGSearch

func (m *Metrics) RecordRAGSearch(storeName string, duration time.Duration, resultCount int)

RecordRAGSearch records a RAG search operation.

func (*Metrics) RecordSchedulerError added in v1.21.0

func (m *Metrics) RecordSchedulerError(appName, agentName, errorType string)

func (*Metrics) RecordSchedulerTrigger added in v1.21.0

func (m *Metrics) RecordSchedulerTrigger(appName, agentName string)

func (*Metrics) RecordSessionCreated

func (m *Metrics) RecordSessionCreated(appName string)

RecordSessionCreated records a session creation.

func (*Metrics) RecordSessionEvent

func (m *Metrics) RecordSessionEvent(appName, eventType string)

RecordSessionEvent records a session event.

func (*Metrics) RecordToolCall

func (m *Metrics) RecordToolCall(toolName string, duration time.Duration)

RecordToolCall records a tool invocation.

func (*Metrics) RecordToolError

func (m *Metrics) RecordToolError(toolName, errorType string)

RecordToolError records a tool error.

func (*Metrics) Registry

func (m *Metrics) Registry() *prometheus.Registry

Registry returns the Prometheus registry.

func (*Metrics) SetQueueDepth added in v1.21.0

func (m *Metrics) SetQueueDepth(appName, status string, count float64)

func (*Metrics) SetSessionsActive

func (m *Metrics) SetSessionsActive(appName string, count int)

SetSessionsActive sets the number of active sessions.

type MetricsConfig

type MetricsConfig struct {
	// Enabled turns on metrics collection.
	// Default: false
	Enabled bool `yaml:"enabled,omitempty"`

	// Endpoint is the path to expose metrics on.
	// Default: "/metrics"
	Endpoint string `yaml:"endpoint,omitempty"`

	// Namespace prefixes all metric names.
	// Default: "hector"
	Namespace string `yaml:"namespace,omitempty"`

	// Subsystem is added between namespace and metric name.
	// Example: With namespace="hector" and subsystem="agent":
	//          metric name becomes "hector_agent_calls_total"
	Subsystem string `yaml:"subsystem,omitempty"`

	// ConstLabels are labels added to all metrics.
	ConstLabels map[string]string `yaml:"const_labels,omitempty"`
}

MetricsConfig configures Prometheus metrics.

func (*MetricsConfig) SetDefaults

func (c *MetricsConfig) SetDefaults()

SetDefaults applies default values to MetricsConfig.

func (*MetricsConfig) Validate

func (c *MetricsConfig) Validate() error

Validate checks MetricsConfig for errors.

type NoopMetrics

type NoopMetrics struct{}

NoopMetrics is a metrics implementation that does nothing.

func (NoopMetrics) DecAgentActiveRuns

func (NoopMetrics) DecAgentActiveRuns(_ string)

func (NoopMetrics) Handler

func (NoopMetrics) Handler() http.Handler

Handler returns a handler that returns 503 Service Unavailable.

func (NoopMetrics) IncAgentActiveRuns

func (NoopMetrics) IncAgentActiveRuns(_ string)

func (NoopMetrics) RecordAgentCall

func (NoopMetrics) RecordAgentCall(_, _ string, _ time.Duration)

Agent metrics - no-op

func (NoopMetrics) RecordAgentError

func (NoopMetrics) RecordAgentError(_, _, _ string)

func (NoopMetrics) RecordHTTPRequest

func (NoopMetrics) RecordHTTPRequest(_, _ string, _ int, _ time.Duration, _, _ int64)

HTTP metrics - no-op

func (NoopMetrics) RecordLLMCall

func (NoopMetrics) RecordLLMCall(_, _ string, _ time.Duration)

LLM metrics - no-op

func (NoopMetrics) RecordLLMError

func (NoopMetrics) RecordLLMError(_, _, _ string)

func (NoopMetrics) RecordLLMTokens

func (NoopMetrics) RecordLLMTokens(_, _ string, _, _ int)

func (NoopMetrics) RecordMemoryIndexed

func (NoopMetrics) RecordMemoryIndexed(_ string, _ int)

func (NoopMetrics) RecordMemorySearch

func (NoopMetrics) RecordMemorySearch(_ string, _ time.Duration)

Memory metrics - no-op

func (NoopMetrics) RecordRAGDocError

func (NoopMetrics) RecordRAGDocError(_ string)

func (NoopMetrics) RecordRAGDocIndexed

func (NoopMetrics) RecordRAGDocIndexed(_ string, _ time.Duration)

RAG metrics - no-op

func (NoopMetrics) RecordRAGDocSkipped

func (NoopMetrics) RecordRAGDocSkipped(_ string)

func (NoopMetrics) RecordRAGSearch

func (NoopMetrics) RecordRAGSearch(_ string, _ time.Duration, _ int)

func (NoopMetrics) RecordSessionCreated

func (NoopMetrics) RecordSessionCreated(_ string)

Session metrics - no-op

func (NoopMetrics) RecordSessionEvent

func (NoopMetrics) RecordSessionEvent(_, _ string)

func (NoopMetrics) RecordToolCall

func (NoopMetrics) RecordToolCall(_ string, _ time.Duration)

Tool metrics - no-op

func (NoopMetrics) RecordToolError

func (NoopMetrics) RecordToolError(_, _ string)

func (NoopMetrics) SetSessionsActive

func (NoopMetrics) SetSessionsActive(_ string, _ int)

type NoopTracer

type NoopTracer struct{}

NoopTracer returns a no-operation Tracer.

func (NoopTracer) AddLLMFinishReason

func (NoopTracer) AddLLMFinishReason(_ trace.Span, _ string)

AddLLMFinishReason is a no-op.

func (NoopTracer) AddLLMUsage

func (NoopTracer) AddLLMUsage(_ trace.Span, _, _ int)

AddLLMUsage is a no-op.

func (NoopTracer) AddPayload

func (NoopTracer) AddPayload(_ trace.Span, _, _ string)

AddPayload is a no-op.

func (NoopTracer) AddToolPayload

func (NoopTracer) AddToolPayload(_ trace.Span, _, _ string)

AddToolPayload is a no-op.

func (NoopTracer) DebugExporter

func (NoopTracer) DebugExporter() *DebugExporter

DebugExporter returns nil.

func (NoopTracer) RecordError

func (NoopTracer) RecordError(_ trace.Span, _ error)

RecordError is a no-op.

func (NoopTracer) Shutdown

func (NoopTracer) Shutdown(_ context.Context) error

Shutdown is a no-op.

func (NoopTracer) Start

Start returns a no-op span.

func (NoopTracer) StartAgentRun

func (NoopTracer) StartAgentRun(ctx context.Context, _, _, _, _, _ string) (context.Context, trace.Span)

StartAgentRun returns a no-op span.

func (NoopTracer) StartLLMCall

func (NoopTracer) StartLLMCall(ctx context.Context, _ string, _ int, _, _ float64) (context.Context, trace.Span)

StartLLMCall returns a no-op span.

func (NoopTracer) StartMemorySearch

func (NoopTracer) StartMemorySearch(ctx context.Context, _ string, _ int) (context.Context, trace.Span)

StartMemorySearch returns a no-op span.

func (NoopTracer) StartToolExecution

func (NoopTracer) StartToolExecution(ctx context.Context, _, _, _ string) (context.Context, trace.Span)

StartToolExecution returns a no-op span.

type Recorder

type Recorder interface {
	// Agent metrics
	RecordAgentCall(agentName, agentType string, duration time.Duration)
	RecordAgentError(agentName, agentType, errorType string)
	IncAgentActiveRuns(agentName string)
	DecAgentActiveRuns(agentName string)

	// LLM metrics
	RecordLLMCall(model, provider string, duration time.Duration)
	RecordLLMTokens(model, provider string, inputTokens, outputTokens int)
	RecordLLMError(model, provider, errorType string)

	// Tool metrics
	RecordToolCall(toolName string, duration time.Duration)
	RecordToolError(toolName, errorType string)

	// Memory metrics
	RecordMemorySearch(indexType string, duration time.Duration)
	RecordMemoryIndexed(indexType string, count int)

	// Session metrics
	RecordSessionCreated(appName string)
	SetSessionsActive(appName string, count int)
	RecordSessionEvent(appName, eventType string)

	// HTTP metrics
	RecordHTTPRequest(method, path string, statusCode int, duration time.Duration, reqSize, respSize int64)

	// RAG metrics
	RecordRAGDocIndexed(storeName string, duration time.Duration)
	RecordRAGDocSkipped(storeName string)
	RecordRAGDocError(storeName string)
	RecordRAGSearch(storeName string, duration time.Duration, resultCount int)
}

Recorder defines the interface for recording metrics. This allows for dependency injection and easier testing.

type SpanEvent

type SpanEvent struct {
	Name       string            `json:"name"`
	TimeUnix   int64             `json:"time_unix_nano"`
	Attributes map[string]string `json:"attributes,omitempty"`
}

SpanEvent represents an event recorded on a span.

type Tracer

type Tracer struct {
	// contains filtered or unexported fields
}

Tracer wraps the OpenTelemetry tracer with Hector-specific helpers.

func NewTracer

func NewTracer(ctx context.Context, cfg *TracingConfig, opts ...TracerOption) (*Tracer, error)

NewTracer creates a new Tracer from configuration.

func (*Tracer) AddLLMFinishReason

func (t *Tracer) AddLLMFinishReason(span trace.Span, reason string)

AddLLMFinishReason adds the finish reason to a span.

func (*Tracer) AddLLMUsage

func (t *Tracer) AddLLMUsage(span trace.Span, inputTokens, outputTokens int)

AddLLMUsage adds token usage information to a span.

func (*Tracer) AddPayload

func (t *Tracer) AddPayload(span trace.Span, request, response string)

AddPayload adds serialized request/response to a span (if capture is enabled).

func (*Tracer) AddRAGIndexStats

func (t *Tracer) AddRAGIndexStats(span trace.Span, chunkCount int)

AddRAGIndexStats adds indexing statistics to a span.

func (*Tracer) AddRAGSearchResults

func (t *Tracer) AddRAGSearchResults(span trace.Span, resultCount int)

AddRAGSearchResults adds search result count to a span.

func (*Tracer) AddToolPayload

func (t *Tracer) AddToolPayload(span trace.Span, args, response string)

AddToolPayload adds serialized tool args/response to a span (if capture is enabled).

func (*Tracer) DebugExporter

func (t *Tracer) DebugExporter() *DebugExporter

DebugExporter returns the debug exporter if configured.

func (*Tracer) RecordError

func (t *Tracer) RecordError(span trace.Span, err error)

RecordError records an error on a span.

func (*Tracer) Shutdown

func (t *Tracer) Shutdown(ctx context.Context) error

Shutdown gracefully shuts down the tracer.

func (*Tracer) Start

func (t *Tracer) Start(ctx context.Context, spanName string, opts ...trace.SpanStartOption) (context.Context, trace.Span)

Start begins a new span with the given name.

func (*Tracer) StartAgentRun

func (t *Tracer) StartAgentRun(ctx context.Context, agentName, agentType, sessionID, userID, invocationID string) (context.Context, trace.Span)

StartAgentRun begins a span for an agent invocation.

func (*Tracer) StartLLMCall

func (t *Tracer) StartLLMCall(ctx context.Context, model string, maxTokens int, temperature, topP float64) (context.Context, trace.Span)

StartLLMCall begins a span for an LLM API call.

func (*Tracer) StartMemorySearch

func (t *Tracer) StartMemorySearch(ctx context.Context, query string, limit int) (context.Context, trace.Span)

StartMemorySearch begins a span for memory search operations.

func (*Tracer) StartRAGChunk

func (t *Tracer) StartRAGChunk(ctx context.Context, strategy string, documentSize int) (context.Context, trace.Span)

StartRAGChunk begins a span for document chunking.

func (*Tracer) StartRAGEmbed

func (t *Tracer) StartRAGEmbed(ctx context.Context, model string, textLength int) (context.Context, trace.Span)

StartRAGEmbed begins a span for embedding generation.

func (*Tracer) StartRAGHyDE

func (t *Tracer) StartRAGHyDE(ctx context.Context, query string) (context.Context, trace.Span)

StartRAGHyDE begins a span for HyDE hypothetical document generation.

func (*Tracer) StartRAGIndex

func (t *Tracer) StartRAGIndex(ctx context.Context, storeName, sourceType string, documentCount int) (context.Context, trace.Span)

StartRAGIndex begins a span for RAG indexing operations.

func (*Tracer) StartRAGRerank

func (t *Tracer) StartRAGRerank(ctx context.Context, inputCount int) (context.Context, trace.Span)

StartRAGRerank begins a span for result reranking.

func (*Tracer) StartRAGSearch

func (t *Tracer) StartRAGSearch(ctx context.Context, storeName, query string, topK int, hydeEnabled, rerankEnabled, multiQueryEnabled bool) (context.Context, trace.Span)

StartRAGSearch begins a span for RAG search operations.

func (*Tracer) StartToolExecution

func (t *Tracer) StartToolExecution(ctx context.Context, toolName, toolDescription, callID string) (context.Context, trace.Span)

StartToolExecution begins a span for tool execution.

type TracerOption

type TracerOption func(*Tracer)

TracerOption configures the Tracer.

func WithCapturePayloads

func WithCapturePayloads(capture bool) TracerOption

WithCapturePayloads enables capturing full LLM request/response in spans.

func WithDebugExporter

func WithDebugExporter(exporter *DebugExporter) TracerOption

WithDebugExporter adds a debug exporter for web UI inspection.

type TracingConfig

type TracingConfig struct {
	// Enabled turns on distributed tracing.
	// Default: false
	Enabled bool `yaml:"enabled,omitempty"`

	// Exporter specifies the trace exporter type.
	// Values: "otlp" (default), "jaeger", "zipkin", "stdout"
	Exporter string `yaml:"exporter,omitempty"`

	// Endpoint is the collector endpoint.
	// For OTLP: "localhost:4317" (gRPC) or "localhost:4318" (HTTP)
	// For Jaeger: "http://localhost:14268/api/traces"
	// For Zipkin: "http://localhost:9411/api/pkg/spans"
	Endpoint string `yaml:"endpoint,omitempty"`

	// SamplingRate controls what fraction of traces are sampled.
	// Range: 0.0 (none) to 1.0 (all)
	// Default: 1.0
	SamplingRate float64 `yaml:"sampling_rate,omitempty"`

	// ServiceName identifies this service in traces.
	// Default: "hector"
	ServiceName string `yaml:"service_name,omitempty"`

	// ServiceVersion is the version of this service.
	ServiceVersion string `yaml:"service_version,omitempty"`

	// Insecure disables TLS for the exporter connection.
	// Default: true (for local development)
	Insecure *bool `yaml:"insecure,omitempty"`

	// Headers are additional headers to send with export requests.
	Headers map[string]string `yaml:"headers,omitempty"`

	// CapturePayloads enables capturing full LLM request/response in spans.
	// Warning: This can produce large spans. Use only for debugging.
	// Default: false
	CapturePayloads bool `yaml:"capture_payloads,omitempty"`

	// DebugExporter enables the in-memory span exporter for web UI.
	// Default: true (when tracing is enabled)
	DebugExporter *bool `yaml:"debug_exporter,omitempty"`

	// Timeout for exporter operations.
	// Default: 10s
	Timeout time.Duration `yaml:"timeout,omitempty"`
}

TracingConfig configures OpenTelemetry tracing.

func (*TracingConfig) IsDebugExporterEnabled

func (c *TracingConfig) IsDebugExporterEnabled() bool

IsDebugExporterEnabled returns whether the debug exporter should be enabled.

func (*TracingConfig) IsInsecure

func (c *TracingConfig) IsInsecure() bool

IsInsecure returns whether to use insecure connection.

func (*TracingConfig) SetDefaults

func (c *TracingConfig) SetDefaults()

SetDefaults applies default values to TracingConfig.

func (*TracingConfig) Validate

func (c *TracingConfig) Validate() error

Validate checks TracingConfig for errors.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL