Documentation
¶
Overview ¶
Package observability provides OpenTelemetry tracing and Prometheus metrics.
This package combines the production-tested foundation from legacy Hector with GenAI semantic conventions from adk-go for ecosystem compatibility.
Architecture ¶
The observability system has three main components:
- Tracing: OpenTelemetry spans with OTLP export
- Metrics: Prometheus counters and histograms
- Debug: In-memory span capture for web UI inspection
Configuration ¶
Configure observability in your hector.yaml:
server:
observability:
tracing:
enabled: true
exporter: otlp
endpoint: localhost:4317
sampling_rate: 1.0
service_name: my-agent
metrics:
enabled: true
endpoint: /metrics
Index ¶
- Constants
- func HTTPMiddleware(tracer *Tracer, metrics *Metrics) func(http.Handler) http.Handler
- func MetricsMiddleware(metrics *Metrics) func(http.Handler) http.Handler
- func TracingMiddleware(tracer *Tracer) func(http.Handler) http.Handler
- type Config
- type DebugExporter
- func (e *DebugExporter) Clear()
- func (e *DebugExporter) Count() int
- func (e *DebugExporter) ExportSpans(ctx context.Context, spans []sdktrace.ReadOnlySpan) error
- func (e *DebugExporter) GetAllSpans() []*DebugSpan
- func (e *DebugExporter) GetByEventID(eventID string) *DebugSpan
- func (e *DebugExporter) GetSpan(spanID string) *DebugSpan
- func (e *DebugExporter) GetSpansByName(name string) []*DebugSpan
- func (e *DebugExporter) GetSpansByTrace(traceID string) []*DebugSpan
- func (e *DebugExporter) Shutdown(ctx context.Context) error
- func (e *DebugExporter) WithMaxSize(size int) *DebugExporter
- type DebugSpan
- type Manager
- func (m *Manager) DebugExporter() *DebugExporter
- func (m *Manager) Metrics() *Metrics
- func (m *Manager) MetricsEnabled() bool
- func (m *Manager) MetricsEndpoint() string
- func (m *Manager) MetricsHandler() http.Handler
- func (m *Manager) Shutdown(ctx context.Context) error
- func (m *Manager) Tracer() *Tracer
- func (m *Manager) TracingEnabled() bool
- type Metrics
- func (m *Metrics) DecAgentActiveRuns(agentName string)
- func (m *Metrics) Handler() http.Handler
- func (m *Metrics) IncAgentActiveRuns(agentName string)
- func (m *Metrics) RecordAgentCall(agentName, agentType string, duration time.Duration)
- func (m *Metrics) RecordAgentError(agentName, agentType, errorType string)
- func (m *Metrics) RecordAppLoad(appName, status string, duration time.Duration)
- func (m *Metrics) RecordAppUnload(appName string)
- func (m *Metrics) RecordHTTPRequest(method, path string, statusCode int, duration time.Duration, ...)
- func (m *Metrics) RecordLLMCall(model, provider string, duration time.Duration)
- func (m *Metrics) RecordLLMError(model, provider, errorType string)
- func (m *Metrics) RecordLLMTokens(model, provider string, inputTokens, outputTokens int)
- func (m *Metrics) RecordMemoryIndexed(indexType string, count int)
- func (m *Metrics) RecordMemorySearch(indexType string, duration time.Duration)
- func (m *Metrics) RecordNotificationRetry(agentName, notifType string)
- func (m *Metrics) RecordNotificationSent(agentName, notifType, status string, duration time.Duration)
- func (m *Metrics) RecordQueueLatency(appName, agentName string, duration time.Duration)
- func (m *Metrics) RecordQueueProcessingDuration(appName, agentName string, duration time.Duration)
- func (m *Metrics) RecordRAGDocError(storeName string)
- func (m *Metrics) RecordRAGDocIndexed(storeName string, duration time.Duration)
- func (m *Metrics) RecordRAGDocSkipped(storeName string)
- func (m *Metrics) RecordRAGSearch(storeName string, duration time.Duration, resultCount int)
- func (m *Metrics) RecordSchedulerError(appName, agentName, errorType string)
- func (m *Metrics) RecordSchedulerTrigger(appName, agentName string)
- func (m *Metrics) RecordSessionCreated(appName string)
- func (m *Metrics) RecordSessionEvent(appName, eventType string)
- func (m *Metrics) RecordToolCall(toolName string, duration time.Duration)
- func (m *Metrics) RecordToolError(toolName, errorType string)
- func (m *Metrics) Registry() *prometheus.Registry
- func (m *Metrics) SetQueueDepth(appName, status string, count float64)
- func (m *Metrics) SetSessionsActive(appName string, count int)
- type MetricsConfig
- type NoopMetrics
- func (NoopMetrics) DecAgentActiveRuns(_ string)
- func (NoopMetrics) Handler() http.Handler
- func (NoopMetrics) IncAgentActiveRuns(_ string)
- func (NoopMetrics) RecordAgentCall(_, _ string, _ time.Duration)
- func (NoopMetrics) RecordAgentError(_, _, _ string)
- func (NoopMetrics) RecordHTTPRequest(_, _ string, _ int, _ time.Duration, _, _ int64)
- func (NoopMetrics) RecordLLMCall(_, _ string, _ time.Duration)
- func (NoopMetrics) RecordLLMError(_, _, _ string)
- func (NoopMetrics) RecordLLMTokens(_, _ string, _, _ int)
- func (NoopMetrics) RecordMemoryIndexed(_ string, _ int)
- func (NoopMetrics) RecordMemorySearch(_ string, _ time.Duration)
- func (NoopMetrics) RecordRAGDocError(_ string)
- func (NoopMetrics) RecordRAGDocIndexed(_ string, _ time.Duration)
- func (NoopMetrics) RecordRAGDocSkipped(_ string)
- func (NoopMetrics) RecordRAGSearch(_ string, _ time.Duration, _ int)
- func (NoopMetrics) RecordSessionCreated(_ string)
- func (NoopMetrics) RecordSessionEvent(_, _ string)
- func (NoopMetrics) RecordToolCall(_ string, _ time.Duration)
- func (NoopMetrics) RecordToolError(_, _ string)
- func (NoopMetrics) SetSessionsActive(_ string, _ int)
- type NoopTracer
- func (NoopTracer) AddLLMFinishReason(_ trace.Span, _ string)
- func (NoopTracer) AddLLMUsage(_ trace.Span, _, _ int)
- func (NoopTracer) AddPayload(_ trace.Span, _, _ string)
- func (NoopTracer) AddToolPayload(_ trace.Span, _, _ string)
- func (NoopTracer) DebugExporter() *DebugExporter
- func (NoopTracer) RecordError(_ trace.Span, _ error)
- func (NoopTracer) Shutdown(_ context.Context) error
- func (NoopTracer) Start(ctx context.Context, _ string, _ ...trace.SpanStartOption) (context.Context, trace.Span)
- func (NoopTracer) StartAgentRun(ctx context.Context, _, _, _, _, _ string) (context.Context, trace.Span)
- func (NoopTracer) StartLLMCall(ctx context.Context, _ string, _ int, _, _ float64) (context.Context, trace.Span)
- func (NoopTracer) StartMemorySearch(ctx context.Context, _ string, _ int) (context.Context, trace.Span)
- func (NoopTracer) StartToolExecution(ctx context.Context, _, _, _ string) (context.Context, trace.Span)
- type Recorder
- type SpanEvent
- type Tracer
- func (t *Tracer) AddLLMFinishReason(span trace.Span, reason string)
- func (t *Tracer) AddLLMUsage(span trace.Span, inputTokens, outputTokens int)
- func (t *Tracer) AddPayload(span trace.Span, request, response string)
- func (t *Tracer) AddRAGIndexStats(span trace.Span, chunkCount int)
- func (t *Tracer) AddRAGSearchResults(span trace.Span, resultCount int)
- func (t *Tracer) AddToolPayload(span trace.Span, args, response string)
- func (t *Tracer) DebugExporter() *DebugExporter
- func (t *Tracer) RecordError(span trace.Span, err error)
- func (t *Tracer) Shutdown(ctx context.Context) error
- func (t *Tracer) Start(ctx context.Context, spanName string, opts ...trace.SpanStartOption) (context.Context, trace.Span)
- func (t *Tracer) StartAgentRun(ctx context.Context, ...) (context.Context, trace.Span)
- func (t *Tracer) StartLLMCall(ctx context.Context, model string, maxTokens int, temperature, topP float64) (context.Context, trace.Span)
- func (t *Tracer) StartMemorySearch(ctx context.Context, query string, limit int) (context.Context, trace.Span)
- func (t *Tracer) StartRAGChunk(ctx context.Context, strategy string, documentSize int) (context.Context, trace.Span)
- func (t *Tracer) StartRAGEmbed(ctx context.Context, model string, textLength int) (context.Context, trace.Span)
- func (t *Tracer) StartRAGHyDE(ctx context.Context, query string) (context.Context, trace.Span)
- func (t *Tracer) StartRAGIndex(ctx context.Context, storeName, sourceType string, documentCount int) (context.Context, trace.Span)
- func (t *Tracer) StartRAGRerank(ctx context.Context, inputCount int) (context.Context, trace.Span)
- func (t *Tracer) StartRAGSearch(ctx context.Context, storeName, query string, topK int, ...) (context.Context, trace.Span)
- func (t *Tracer) StartToolExecution(ctx context.Context, toolName, toolDescription, callID string) (context.Context, trace.Span)
- type TracerOption
- type TracingConfig
Constants ¶
const ( // AttrServiceName is the logical name of the service. AttrServiceName = "service.name" // AttrServiceVersion is the version of the service. AttrServiceVersion = "service.version" // AttrServiceInstance is the instance ID of the service. AttrServiceInstance = "service.instance.id" )
const ( // AttrGenAISystem identifies the GenAI system (e.g., "hector", "openai"). AttrGenAISystem = "gen_ai.system" // AttrGenAIOperationName is the operation being performed. // Values: "chat", "text_completion", "embeddings" AttrGenAIOperationName = "gen_ai.operation.name" // AttrGenAIRequestModel is the name of the model being used. AttrGenAIRequestModel = "gen_ai.request.model" // AttrGenAIRequestTemperature is the temperature parameter. AttrGenAIRequestTemperature = "gen_ai.request.temperature" // AttrGenAIRequestTopP is the top_p parameter. AttrGenAIRequestTopP = "gen_ai.request.top_p" // AttrGenAIRequestMaxTokens is the maximum tokens requested. AttrGenAIRequestMaxTokens = "gen_ai.request.max_tokens" // AttrGenAIResponseFinishReason is why generation stopped. // Values: "stop", "length", "tool_calls", "content_filter" AttrGenAIResponseFinishReason = "gen_ai.response.finish_reason" // AttrGenAIUsageInputTokens is the number of input tokens. AttrGenAIUsageInputTokens = "gen_ai.usage.input_tokens" // AttrGenAIUsageOutputTokens is the number of output tokens. AttrGenAIUsageOutputTokens = "gen_ai.usage.output_tokens" // AttrGenAIToolName is the name of the tool being called. AttrGenAIToolName = "gen_ai.tool.name" // AttrGenAIToolDescription is the description of the tool. AttrGenAIToolDescription = "gen_ai.tool.description" // AttrGenAIToolCallID is the unique ID of the tool call. AttrGenAIToolCallID = "gen_ai.tool.call.id" )
const ( // AttrHectorAgentName is the name of the agent. AttrHectorAgentName = "hector.agent.name" // AttrHectorAgentType is the type of agent (llm, workflow, remote). AttrHectorAgentType = "hector.agent.type" // AttrHectorInvocationID is the unique ID for this agent invocation. AttrHectorInvocationID = "hector.invocation_id" // AttrHectorSessionID is the session ID. AttrHectorSessionID = "hector.session_id" // AttrHectorUserID is the user ID. AttrHectorUserID = "hector.user_id" // AttrHectorEventID is the event ID within a session. AttrHectorEventID = "hector.event_id" // AttrHectorLLMRequest is the serialized LLM request (optional, for debugging). AttrHectorLLMRequest = "hector.llm.request" // AttrHectorLLMResponse is the serialized LLM response (optional, for debugging). AttrHectorLLMResponse = "hector.llm.response" // AttrHectorToolArgs is the serialized tool arguments (optional, for debugging). AttrHectorToolArgs = "hector.tool.args" // AttrHectorToolResponse is the serialized tool response (optional, for debugging). AttrHectorToolResponse = "hector.tool.response" // AttrHectorThinkingBlocks is the number of thinking blocks in response. AttrHectorThinkingBlocks = "hector.llm.thinking.blocks" // AttrHectorThinkingLength is the total length of thinking content (chars). AttrHectorThinkingLength = "hector.llm.thinking.length" )
const ( // AttrHTTPMethod is the HTTP method. AttrHTTPMethod = "http.method" // AttrHTTPPath is the HTTP path (route pattern, not raw path). AttrHTTPPath = "http.route" // AttrHTTPStatusCode is the HTTP response status code. AttrHTTPStatusCode = "http.status_code" // AttrHTTPRequestSize is the request body size in bytes. AttrHTTPRequestSize = "http.request.body.size" // AttrHTTPResponseSize is the response body size in bytes. AttrHTTPResponseSize = "http.response.body.size" )
const ( // AttrErrorType is the type of error that occurred. AttrErrorType = "error.type" // AttrErrorMessage is the error message. AttrErrorMessage = "error.message" )
const ( // AttrRAGStoreName is the name of the document store. AttrRAGStoreName = "hector.rag.store.name" // AttrRAGQuery is the search query. AttrRAGQuery = "hector.rag.query" // AttrRAGResultCount is the number of search results. AttrRAGResultCount = "hector.rag.result_count" // AttrRAGTopK is the requested number of results. AttrRAGTopK = "hector.rag.top_k" // AttrRAGDocumentCount is the number of documents indexed. AttrRAGDocumentCount = "hector.rag.document_count" // AttrRAGChunkCount is the number of chunks indexed. AttrRAGChunkCount = "hector.rag.chunk_count" // AttrRAGSourceType is the data source type (directory, sql, api). AttrRAGSourceType = "hector.rag.source_type" // AttrRAGChunkerStrategy is the chunking strategy used. AttrRAGChunkerStrategy = "hector.rag.chunker_strategy" // AttrRAGHyDEEnabled indicates if HyDE was used. AttrRAGHyDEEnabled = "hector.rag.hyde_enabled" // AttrRAGRerankEnabled indicates if reranking was used. AttrRAGRerankEnabled = "hector.rag.rerank_enabled" // AttrRAGMultiQueryEnabled indicates if multi-query was used. AttrRAGMultiQueryEnabled = "hector.rag.multiquery_enabled" // AttrRAGEmbeddingModel is the embedding model used. AttrRAGEmbeddingModel = "hector.rag.embedding_model" // AttrRAGVectorProvider is the vector database provider. AttrRAGVectorProvider = "hector.rag.vector_provider" )
const ( // SpanAgentRun is the top-level span for an agent invocation. SpanAgentRun = "hector.agent.run" // SpanLLMCall is a span for an LLM API call. SpanLLMCall = "hector.llm.call" // SpanToolExecution is a span for tool execution. SpanToolExecution = "hector.tool.execute" // SpanMemorySearch is a span for memory/index search. SpanMemorySearch = "hector.memory.search" // SpanSessionLoad is a span for loading session data. SpanSessionLoad = "hector.session.load" // SpanHTTPRequest is a span for HTTP request handling. SpanHTTPRequest = "hector.http.request" // SpanRAGSearch is a span for RAG search operations. SpanRAGSearch = "hector.rag.search" // SpanRAGIndex is a span for RAG indexing operations. SpanRAGIndex = "hector.rag.index" // SpanRAGEmbed is a span for embedding generation. SpanRAGEmbed = "hector.rag.embed" // SpanRAGChunk is a span for document chunking. SpanRAGChunk = "hector.rag.chunk" // SpanRAGRerank is a span for result reranking. SpanRAGRerank = "hector.rag.rerank" // SpanRAGHyDE is a span for HyDE hypothetical document generation. SpanRAGHyDE = "hector.rag.hyde" )
const ( // DefaultServiceName is the default service name for tracing. DefaultServiceName = "hector" // DefaultSamplingRate is the default trace sampling rate. DefaultSamplingRate = 1.0 // DefaultOTLPEndpoint is the default OTLP endpoint. DefaultOTLPEndpoint = "localhost:4317" // DefaultMetricsPath is the default Prometheus metrics endpoint. DefaultMetricsPath = "/metrics" )
const ( // OpChat is a chat completion operation. OpChat = "chat" // OpTextCompletion is a text completion operation. OpTextCompletion = "text_completion" // OpEmbeddings is an embeddings generation operation. OpEmbeddings = "embeddings" // OpToolCall is a tool execution operation. OpToolCall = "execute_tool" )
Variables ¶
This section is empty.
Functions ¶
func HTTPMiddleware ¶
HTTPMiddleware creates HTTP middleware that records both traces and metrics.
func MetricsMiddleware ¶
MetricsMiddleware creates HTTP middleware that only records metrics.
Types ¶
type Config ¶
type Config struct {
// Tracing configures OpenTelemetry distributed tracing.
Tracing TracingConfig `yaml:"tracing,omitempty"`
// Metrics configures Prometheus metrics collection.
Metrics MetricsConfig `yaml:"metrics,omitempty"`
}
Config configures the observability system.
func (*Config) SetDefaults ¶
func (c *Config) SetDefaults()
SetDefaults applies default values to Config.
type DebugExporter ¶
type DebugExporter struct {
// contains filtered or unexported fields
}
DebugExporter is a custom SpanExporter that stores span data in memory for web UI inspection and debugging.
Inspired by adk-go's APIServerSpanExporter, this exporter captures relevant span attributes (LLM calls, tool executions, agent runs) keyed by event ID for easy lookup.
Thread-safe for concurrent reads and writes.
func NewDebugExporter ¶
func NewDebugExporter() *DebugExporter
NewDebugExporter creates a new DebugExporter.
func (*DebugExporter) Count ¶
func (e *DebugExporter) Count() int
Count returns the number of captured spans.
func (*DebugExporter) ExportSpans ¶
func (e *DebugExporter) ExportSpans(ctx context.Context, spans []sdktrace.ReadOnlySpan) error
ExportSpans implements sdktrace.SpanExporter. It captures span data for relevant spans (LLM calls, tool executions, etc.).
func (*DebugExporter) GetAllSpans ¶
func (e *DebugExporter) GetAllSpans() []*DebugSpan
GetAllSpans returns all captured spans.
func (*DebugExporter) GetByEventID ¶
func (e *DebugExporter) GetByEventID(eventID string) *DebugSpan
GetByEventID returns a span by its hector.event_id attribute.
func (*DebugExporter) GetSpan ¶
func (e *DebugExporter) GetSpan(spanID string) *DebugSpan
GetSpan returns a span by its span ID.
func (*DebugExporter) GetSpansByName ¶
func (e *DebugExporter) GetSpansByName(name string) []*DebugSpan
GetSpansByName returns all spans with the given name.
func (*DebugExporter) GetSpansByTrace ¶
func (e *DebugExporter) GetSpansByTrace(traceID string) []*DebugSpan
GetSpansByTrace returns all spans for a given trace ID.
func (*DebugExporter) Shutdown ¶
func (e *DebugExporter) Shutdown(ctx context.Context) error
Shutdown implements sdktrace.SpanExporter.
func (*DebugExporter) WithMaxSize ¶
func (e *DebugExporter) WithMaxSize(size int) *DebugExporter
WithMaxSize sets the maximum number of spans to retain.
type DebugSpan ¶
type DebugSpan struct {
TraceID string `json:"trace_id"`
SpanID string `json:"span_id"`
ParentSpanID string `json:"parent_span_id,omitempty"`
Name string `json:"name"`
StartTime int64 `json:"start_time_unix_nano"`
EndTime int64 `json:"end_time_unix_nano"`
DurationMs float64 `json:"duration_ms"`
Attributes map[string]string `json:"attributes"`
Events []SpanEvent `json:"events,omitempty"`
Status string `json:"status"`
StatusMsg string `json:"status_message,omitempty"`
}
DebugSpan contains captured span information for debugging.
type Manager ¶
type Manager struct {
// contains filtered or unexported fields
}
Manager manages the lifecycle of all observability components. It provides a unified interface for initializing, accessing, and shutting down tracing and metrics systems.
func MustNewManager ¶
MustNewManager creates a Manager and panics on error. Useful for initialization in main() when errors are fatal.
func NewFromConfig ¶
NewFromConfig creates a Manager with defaults from a configuration pointer. This is useful when the config might be nil.
func NewManager ¶
NewManager creates a new observability Manager from configuration.
func NoopManager ¶
func NoopManager() *Manager
NoopManager returns a no-operation Manager that does nothing. Use this when observability is completely disabled.
func (*Manager) DebugExporter ¶
func (m *Manager) DebugExporter() *DebugExporter
DebugExporter returns the debug span exporter, or nil if not enabled.
func (*Manager) MetricsEnabled ¶
MetricsEnabled returns whether metrics are enabled.
func (*Manager) MetricsEndpoint ¶
MetricsEndpoint returns the configured metrics endpoint path.
func (*Manager) MetricsHandler ¶
MetricsHandler returns an HTTP handler for the metrics endpoint.
func (*Manager) TracingEnabled ¶
TracingEnabled returns whether tracing is enabled.
type Metrics ¶
type Metrics struct {
// contains filtered or unexported fields
}
Metrics provides Prometheus metrics collection for Hector.
func NewMetrics ¶
func NewMetrics(cfg *MetricsConfig) (*Metrics, error)
NewMetrics creates a new Metrics instance from configuration.
func (*Metrics) DecAgentActiveRuns ¶
DecAgentActiveRuns decrements the active runs counter.
func (*Metrics) IncAgentActiveRuns ¶
IncAgentActiveRuns increments the active runs counter.
func (*Metrics) RecordAgentCall ¶
RecordAgentCall records an agent invocation.
func (*Metrics) RecordAgentError ¶
RecordAgentError records an agent error.
func (*Metrics) RecordAppLoad ¶ added in v1.21.0
func (*Metrics) RecordAppUnload ¶ added in v1.21.0
func (*Metrics) RecordHTTPRequest ¶
func (m *Metrics) RecordHTTPRequest(method, path string, statusCode int, duration time.Duration, reqSize, respSize int64)
RecordHTTPRequest records an HTTP request.
func (*Metrics) RecordLLMCall ¶
RecordLLMCall records an LLM API call.
func (*Metrics) RecordLLMError ¶
RecordLLMError records an LLM error.
func (*Metrics) RecordLLMTokens ¶
RecordLLMTokens records token usage.
func (*Metrics) RecordMemoryIndexed ¶
RecordMemoryIndexed records items being indexed.
func (*Metrics) RecordMemorySearch ¶
RecordMemorySearch records a memory search operation.
func (*Metrics) RecordNotificationRetry ¶ added in v1.21.0
func (*Metrics) RecordNotificationSent ¶ added in v1.21.0
func (*Metrics) RecordQueueLatency ¶ added in v1.21.0
func (*Metrics) RecordQueueProcessingDuration ¶ added in v1.21.0
func (*Metrics) RecordRAGDocError ¶
RecordRAGDocError records a document indexing error.
func (*Metrics) RecordRAGDocIndexed ¶
RecordRAGDocIndexed records a document being indexed.
func (*Metrics) RecordRAGDocSkipped ¶
RecordRAGDocSkipped records a document being skipped.
func (*Metrics) RecordRAGSearch ¶
RecordRAGSearch records a RAG search operation.
func (*Metrics) RecordSchedulerError ¶ added in v1.21.0
func (*Metrics) RecordSchedulerTrigger ¶ added in v1.21.0
func (*Metrics) RecordSessionCreated ¶
RecordSessionCreated records a session creation.
func (*Metrics) RecordSessionEvent ¶
RecordSessionEvent records a session event.
func (*Metrics) RecordToolCall ¶
RecordToolCall records a tool invocation.
func (*Metrics) RecordToolError ¶
RecordToolError records a tool error.
func (*Metrics) Registry ¶
func (m *Metrics) Registry() *prometheus.Registry
Registry returns the Prometheus registry.
func (*Metrics) SetQueueDepth ¶ added in v1.21.0
func (*Metrics) SetSessionsActive ¶
SetSessionsActive sets the number of active sessions.
type MetricsConfig ¶
type MetricsConfig struct {
// Enabled turns on metrics collection.
// Default: false
Enabled bool `yaml:"enabled,omitempty"`
// Endpoint is the path to expose metrics on.
// Default: "/metrics"
Endpoint string `yaml:"endpoint,omitempty"`
// Namespace prefixes all metric names.
// Default: "hector"
Namespace string `yaml:"namespace,omitempty"`
// Subsystem is added between namespace and metric name.
// Example: With namespace="hector" and subsystem="agent":
// metric name becomes "hector_agent_calls_total"
Subsystem string `yaml:"subsystem,omitempty"`
// ConstLabels are labels added to all metrics.
ConstLabels map[string]string `yaml:"const_labels,omitempty"`
}
MetricsConfig configures Prometheus metrics.
func (*MetricsConfig) SetDefaults ¶
func (c *MetricsConfig) SetDefaults()
SetDefaults applies default values to MetricsConfig.
func (*MetricsConfig) Validate ¶
func (c *MetricsConfig) Validate() error
Validate checks MetricsConfig for errors.
type NoopMetrics ¶
type NoopMetrics struct{}
NoopMetrics is a metrics implementation that does nothing.
func (NoopMetrics) DecAgentActiveRuns ¶
func (NoopMetrics) DecAgentActiveRuns(_ string)
func (NoopMetrics) Handler ¶
func (NoopMetrics) Handler() http.Handler
Handler returns a handler that returns 503 Service Unavailable.
func (NoopMetrics) IncAgentActiveRuns ¶
func (NoopMetrics) IncAgentActiveRuns(_ string)
func (NoopMetrics) RecordAgentCall ¶
func (NoopMetrics) RecordAgentCall(_, _ string, _ time.Duration)
Agent metrics - no-op
func (NoopMetrics) RecordAgentError ¶
func (NoopMetrics) RecordAgentError(_, _, _ string)
func (NoopMetrics) RecordHTTPRequest ¶
HTTP metrics - no-op
func (NoopMetrics) RecordLLMCall ¶
func (NoopMetrics) RecordLLMCall(_, _ string, _ time.Duration)
LLM metrics - no-op
func (NoopMetrics) RecordLLMError ¶
func (NoopMetrics) RecordLLMError(_, _, _ string)
func (NoopMetrics) RecordLLMTokens ¶
func (NoopMetrics) RecordLLMTokens(_, _ string, _, _ int)
func (NoopMetrics) RecordMemoryIndexed ¶
func (NoopMetrics) RecordMemoryIndexed(_ string, _ int)
func (NoopMetrics) RecordMemorySearch ¶
func (NoopMetrics) RecordMemorySearch(_ string, _ time.Duration)
Memory metrics - no-op
func (NoopMetrics) RecordRAGDocError ¶
func (NoopMetrics) RecordRAGDocError(_ string)
func (NoopMetrics) RecordRAGDocIndexed ¶
func (NoopMetrics) RecordRAGDocIndexed(_ string, _ time.Duration)
RAG metrics - no-op
func (NoopMetrics) RecordRAGDocSkipped ¶
func (NoopMetrics) RecordRAGDocSkipped(_ string)
func (NoopMetrics) RecordRAGSearch ¶
func (NoopMetrics) RecordRAGSearch(_ string, _ time.Duration, _ int)
func (NoopMetrics) RecordSessionCreated ¶
func (NoopMetrics) RecordSessionCreated(_ string)
Session metrics - no-op
func (NoopMetrics) RecordSessionEvent ¶
func (NoopMetrics) RecordSessionEvent(_, _ string)
func (NoopMetrics) RecordToolCall ¶
func (NoopMetrics) RecordToolCall(_ string, _ time.Duration)
Tool metrics - no-op
func (NoopMetrics) RecordToolError ¶
func (NoopMetrics) RecordToolError(_, _ string)
func (NoopMetrics) SetSessionsActive ¶
func (NoopMetrics) SetSessionsActive(_ string, _ int)
type NoopTracer ¶
type NoopTracer struct{}
NoopTracer returns a no-operation Tracer.
func (NoopTracer) AddLLMFinishReason ¶
func (NoopTracer) AddLLMFinishReason(_ trace.Span, _ string)
AddLLMFinishReason is a no-op.
func (NoopTracer) AddLLMUsage ¶
func (NoopTracer) AddLLMUsage(_ trace.Span, _, _ int)
AddLLMUsage is a no-op.
func (NoopTracer) AddPayload ¶
func (NoopTracer) AddPayload(_ trace.Span, _, _ string)
AddPayload is a no-op.
func (NoopTracer) AddToolPayload ¶
func (NoopTracer) AddToolPayload(_ trace.Span, _, _ string)
AddToolPayload is a no-op.
func (NoopTracer) DebugExporter ¶
func (NoopTracer) DebugExporter() *DebugExporter
DebugExporter returns nil.
func (NoopTracer) RecordError ¶
func (NoopTracer) RecordError(_ trace.Span, _ error)
RecordError is a no-op.
func (NoopTracer) Shutdown ¶
func (NoopTracer) Shutdown(_ context.Context) error
Shutdown is a no-op.
func (NoopTracer) Start ¶
func (NoopTracer) Start(ctx context.Context, _ string, _ ...trace.SpanStartOption) (context.Context, trace.Span)
Start returns a no-op span.
func (NoopTracer) StartAgentRun ¶
func (NoopTracer) StartAgentRun(ctx context.Context, _, _, _, _, _ string) (context.Context, trace.Span)
StartAgentRun returns a no-op span.
func (NoopTracer) StartLLMCall ¶
func (NoopTracer) StartLLMCall(ctx context.Context, _ string, _ int, _, _ float64) (context.Context, trace.Span)
StartLLMCall returns a no-op span.
func (NoopTracer) StartMemorySearch ¶
func (NoopTracer) StartMemorySearch(ctx context.Context, _ string, _ int) (context.Context, trace.Span)
StartMemorySearch returns a no-op span.
func (NoopTracer) StartToolExecution ¶
func (NoopTracer) StartToolExecution(ctx context.Context, _, _, _ string) (context.Context, trace.Span)
StartToolExecution returns a no-op span.
type Recorder ¶
type Recorder interface {
// Agent metrics
RecordAgentCall(agentName, agentType string, duration time.Duration)
RecordAgentError(agentName, agentType, errorType string)
IncAgentActiveRuns(agentName string)
DecAgentActiveRuns(agentName string)
// LLM metrics
RecordLLMCall(model, provider string, duration time.Duration)
RecordLLMTokens(model, provider string, inputTokens, outputTokens int)
RecordLLMError(model, provider, errorType string)
// Tool metrics
RecordToolCall(toolName string, duration time.Duration)
RecordToolError(toolName, errorType string)
// Memory metrics
RecordMemorySearch(indexType string, duration time.Duration)
RecordMemoryIndexed(indexType string, count int)
// Session metrics
RecordSessionCreated(appName string)
SetSessionsActive(appName string, count int)
RecordSessionEvent(appName, eventType string)
// HTTP metrics
RecordHTTPRequest(method, path string, statusCode int, duration time.Duration, reqSize, respSize int64)
// RAG metrics
RecordRAGDocIndexed(storeName string, duration time.Duration)
RecordRAGDocSkipped(storeName string)
RecordRAGDocError(storeName string)
RecordRAGSearch(storeName string, duration time.Duration, resultCount int)
}
Recorder defines the interface for recording metrics. This allows for dependency injection and easier testing.
type SpanEvent ¶
type SpanEvent struct {
Name string `json:"name"`
TimeUnix int64 `json:"time_unix_nano"`
Attributes map[string]string `json:"attributes,omitempty"`
}
SpanEvent represents an event recorded on a span.
type Tracer ¶
type Tracer struct {
// contains filtered or unexported fields
}
Tracer wraps the OpenTelemetry tracer with Hector-specific helpers.
func NewTracer ¶
func NewTracer(ctx context.Context, cfg *TracingConfig, opts ...TracerOption) (*Tracer, error)
NewTracer creates a new Tracer from configuration.
func (*Tracer) AddLLMFinishReason ¶
AddLLMFinishReason adds the finish reason to a span.
func (*Tracer) AddLLMUsage ¶
AddLLMUsage adds token usage information to a span.
func (*Tracer) AddPayload ¶
AddPayload adds serialized request/response to a span (if capture is enabled).
func (*Tracer) AddRAGIndexStats ¶
AddRAGIndexStats adds indexing statistics to a span.
func (*Tracer) AddRAGSearchResults ¶
AddRAGSearchResults adds search result count to a span.
func (*Tracer) AddToolPayload ¶
AddToolPayload adds serialized tool args/response to a span (if capture is enabled).
func (*Tracer) DebugExporter ¶
func (t *Tracer) DebugExporter() *DebugExporter
DebugExporter returns the debug exporter if configured.
func (*Tracer) RecordError ¶
RecordError records an error on a span.
func (*Tracer) Start ¶
func (t *Tracer) Start(ctx context.Context, spanName string, opts ...trace.SpanStartOption) (context.Context, trace.Span)
Start begins a new span with the given name.
func (*Tracer) StartAgentRun ¶
func (t *Tracer) StartAgentRun(ctx context.Context, agentName, agentType, sessionID, userID, invocationID string) (context.Context, trace.Span)
StartAgentRun begins a span for an agent invocation.
func (*Tracer) StartLLMCall ¶
func (t *Tracer) StartLLMCall(ctx context.Context, model string, maxTokens int, temperature, topP float64) (context.Context, trace.Span)
StartLLMCall begins a span for an LLM API call.
func (*Tracer) StartMemorySearch ¶
func (t *Tracer) StartMemorySearch(ctx context.Context, query string, limit int) (context.Context, trace.Span)
StartMemorySearch begins a span for memory search operations.
func (*Tracer) StartRAGChunk ¶
func (t *Tracer) StartRAGChunk(ctx context.Context, strategy string, documentSize int) (context.Context, trace.Span)
StartRAGChunk begins a span for document chunking.
func (*Tracer) StartRAGEmbed ¶
func (t *Tracer) StartRAGEmbed(ctx context.Context, model string, textLength int) (context.Context, trace.Span)
StartRAGEmbed begins a span for embedding generation.
func (*Tracer) StartRAGHyDE ¶
StartRAGHyDE begins a span for HyDE hypothetical document generation.
func (*Tracer) StartRAGIndex ¶
func (t *Tracer) StartRAGIndex(ctx context.Context, storeName, sourceType string, documentCount int) (context.Context, trace.Span)
StartRAGIndex begins a span for RAG indexing operations.
func (*Tracer) StartRAGRerank ¶
StartRAGRerank begins a span for result reranking.
type TracerOption ¶
type TracerOption func(*Tracer)
TracerOption configures the Tracer.
func WithCapturePayloads ¶
func WithCapturePayloads(capture bool) TracerOption
WithCapturePayloads enables capturing full LLM request/response in spans.
func WithDebugExporter ¶
func WithDebugExporter(exporter *DebugExporter) TracerOption
WithDebugExporter adds a debug exporter for web UI inspection.
type TracingConfig ¶
type TracingConfig struct {
// Enabled turns on distributed tracing.
// Default: false
Enabled bool `yaml:"enabled,omitempty"`
// Exporter specifies the trace exporter type.
// Values: "otlp" (default), "jaeger", "zipkin", "stdout"
Exporter string `yaml:"exporter,omitempty"`
// Endpoint is the collector endpoint.
// For OTLP: "localhost:4317" (gRPC) or "localhost:4318" (HTTP)
// For Jaeger: "http://localhost:14268/api/traces"
// For Zipkin: "http://localhost:9411/api/pkg/spans"
Endpoint string `yaml:"endpoint,omitempty"`
// SamplingRate controls what fraction of traces are sampled.
// Range: 0.0 (none) to 1.0 (all)
// Default: 1.0
SamplingRate float64 `yaml:"sampling_rate,omitempty"`
// ServiceName identifies this service in traces.
// Default: "hector"
ServiceName string `yaml:"service_name,omitempty"`
// ServiceVersion is the version of this service.
ServiceVersion string `yaml:"service_version,omitempty"`
// Insecure disables TLS for the exporter connection.
// Default: true (for local development)
Insecure *bool `yaml:"insecure,omitempty"`
// Headers are additional headers to send with export requests.
Headers map[string]string `yaml:"headers,omitempty"`
// CapturePayloads enables capturing full LLM request/response in spans.
// Warning: This can produce large spans. Use only for debugging.
// Default: false
CapturePayloads bool `yaml:"capture_payloads,omitempty"`
// DebugExporter enables the in-memory span exporter for web UI.
// Default: true (when tracing is enabled)
DebugExporter *bool `yaml:"debug_exporter,omitempty"`
// Timeout for exporter operations.
// Default: 10s
Timeout time.Duration `yaml:"timeout,omitempty"`
}
TracingConfig configures OpenTelemetry tracing.
func (*TracingConfig) IsDebugExporterEnabled ¶
func (c *TracingConfig) IsDebugExporterEnabled() bool
IsDebugExporterEnabled returns whether the debug exporter should be enabled.
func (*TracingConfig) IsInsecure ¶
func (c *TracingConfig) IsInsecure() bool
IsInsecure returns whether to use insecure connection.
func (*TracingConfig) SetDefaults ¶
func (c *TracingConfig) SetDefaults()
SetDefaults applies default values to TracingConfig.
func (*TracingConfig) Validate ¶
func (c *TracingConfig) Validate() error
Validate checks TracingConfig for errors.