Documentation
¶
Overview ¶
Package telemetry defines the telemetry subsystem for metrics, logs, traces, and resource snapshots. It includes the Collector interface for pluggable telemetry sources and the Service/Store interfaces for aggregation and persistence.
Index ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Collector ¶
type Collector interface {
// Name identifies this collector.
Name() string
// CollectMetrics gathers current metrics for an instance.
CollectMetrics(ctx context.Context, instanceID id.ID) ([]Metric, error)
// CollectResources gathers a resource snapshot for an instance.
CollectResources(ctx context.Context, instanceID id.ID) (*ResourceSnapshot, error)
}
Collector gathers telemetry from a provider or external source. Implement for custom telemetry sources.
type DashboardData ¶
type DashboardData struct {
InstanceID id.ID `json:"instance_id"`
Resources *ResourceSnapshot `json:"resources"`
HealthStatus string `json:"health_status"`
UptimePercent float64 `json:"uptime_percent"`
RequestRate float64 `json:"request_rate_per_sec"`
ErrorRate float64 `json:"error_rate_per_sec"`
AvgLatency time.Duration `json:"avg_latency"`
P99Latency time.Duration `json:"p99_latency"`
RecentDeploys int `json:"recent_deploys_24h"`
ActiveAlerts int `json:"active_alerts"`
}
DashboardData is a pre-aggregated view of instance telemetry.
type LogEntry ¶
type LogEntry struct {
InstanceID id.ID `db:"instance_id" json:"instance_id"`
TenantID string `db:"tenant_id" json:"tenant_id"`
Level string `db:"level" json:"level"`
Message string `db:"message" json:"message"`
Fields map[string]any `db:"fields" json:"fields,omitempty"`
Source string `db:"source" json:"source"`
Timestamp time.Time `db:"timestamp" json:"timestamp"`
}
LogEntry is a structured log line from an instance.
type LogQuery ¶
type LogQuery struct {
InstanceID id.ID `json:"instance_id"`
Level string `json:"level,omitempty"`
Search string `json:"search,omitempty"`
Since time.Time `json:"since"`
Until time.Time `json:"until"`
Limit int `json:"limit,omitempty"`
}
LogQuery configures a log query.
type Metric ¶
type Metric struct {
InstanceID id.ID `db:"instance_id" json:"instance_id"`
TenantID string `db:"tenant_id" json:"tenant_id"`
Name string `db:"name" json:"name"`
Type MetricType `db:"type" json:"type"`
Value float64 `db:"value" json:"value"`
Labels map[string]string `db:"labels" json:"labels,omitempty"`
Timestamp time.Time `db:"timestamp" json:"timestamp"`
}
Metric is a single metric data point.
type MetricQuery ¶
type MetricQuery struct {
InstanceID id.ID `json:"instance_id"`
Name string `json:"name,omitempty"`
Since time.Time `json:"since"`
Until time.Time `json:"until"`
Step time.Duration `json:"step,omitempty"`
Limit int `json:"limit,omitempty"`
}
MetricQuery configures a metrics query.
type MetricType ¶
type MetricType string
MetricType classifies the metric.
const ( // MetricGauge is a point-in-time value. MetricGauge MetricType = "gauge" // MetricCounter is a monotonically increasing value. MetricCounter MetricType = "counter" // MetricHist is a histogram distribution. MetricHist MetricType = "histogram" )
type ResourceSnapshot ¶
type ResourceSnapshot struct {
InstanceID id.ID `db:"instance_id" json:"instance_id"`
TenantID string `db:"tenant_id" json:"tenant_id"`
CPUPercent float64 `db:"cpu_percent" json:"cpu_percent"`
MemoryUsedMB int `db:"memory_used_mb" json:"memory_used_mb"`
MemoryLimitMB int `db:"memory_limit_mb" json:"memory_limit_mb"`
DiskUsedMB int `db:"disk_used_mb" json:"disk_used_mb"`
NetworkInMB float64 `db:"network_in_mb" json:"network_in_mb"`
NetworkOutMB float64 `db:"network_out_mb" json:"network_out_mb"`
Timestamp time.Time `db:"timestamp" json:"timestamp"`
}
ResourceSnapshot captures point-in-time resource usage for an instance.
type Service ¶
type Service interface {
// PushMetrics ingests metric data points.
PushMetrics(ctx context.Context, metrics []Metric) error
// QueryMetrics returns metrics matching the query.
QueryMetrics(ctx context.Context, q MetricQuery) ([]Metric, error)
// PushLogs ingests log entries.
PushLogs(ctx context.Context, logs []LogEntry) error
// QueryLogs returns log entries matching the query.
QueryLogs(ctx context.Context, q LogQuery) ([]LogEntry, error)
// PushTraces ingests trace spans.
PushTraces(ctx context.Context, traces []Trace) error
// QueryTraces returns traces matching the query.
QueryTraces(ctx context.Context, q TraceQuery) ([]Trace, error)
// GetCurrentResources returns the latest resource snapshot for an instance.
GetCurrentResources(ctx context.Context, instanceID id.ID) (*ResourceSnapshot, error)
// GetResourceHistory returns resource snapshots over a time range.
GetResourceHistory(ctx context.Context, instanceID id.ID, opts TimeRange) ([]ResourceSnapshot, error)
// GetDashboard returns a pre-aggregated view of instance telemetry.
GetDashboard(ctx context.Context, instanceID id.ID) (*DashboardData, error)
// RegisterCollector adds a custom telemetry collector.
RegisterCollector(collector Collector)
}
Service manages telemetry collection, storage, and querying.
type Store ¶
type Store interface {
// InsertMetrics persists metric data points.
InsertMetrics(ctx context.Context, metrics []Metric) error
// QueryMetrics returns metrics matching the query parameters.
QueryMetrics(ctx context.Context, q MetricQuery) ([]Metric, error)
// InsertLogs persists log entries.
InsertLogs(ctx context.Context, logs []LogEntry) error
// QueryLogs returns log entries matching the query parameters.
QueryLogs(ctx context.Context, q LogQuery) ([]LogEntry, error)
// InsertTraces persists trace spans.
InsertTraces(ctx context.Context, traces []Trace) error
// QueryTraces returns traces matching the query parameters.
QueryTraces(ctx context.Context, q TraceQuery) ([]Trace, error)
// InsertResourceSnapshot persists a resource snapshot.
InsertResourceSnapshot(ctx context.Context, snap *ResourceSnapshot) error
// GetLatestResourceSnapshot returns the most recent snapshot for an instance.
GetLatestResourceSnapshot(ctx context.Context, tenantID string, instanceID id.ID) (*ResourceSnapshot, error)
// ListResourceSnapshots returns snapshots for an instance within a time range.
ListResourceSnapshots(ctx context.Context, tenantID string, instanceID id.ID, opts TimeRange) ([]ResourceSnapshot, error)
}
Store is the persistence interface for telemetry data.
type Trace ¶
type Trace struct {
InstanceID id.ID `db:"instance_id" json:"instance_id"`
TenantID string `db:"tenant_id" json:"tenant_id"`
TraceID string `db:"trace_id" json:"trace_id"`
SpanID string `db:"span_id" json:"span_id"`
ParentID string `db:"parent_id" json:"parent_id,omitempty"`
Operation string `db:"operation" json:"operation"`
Duration time.Duration `db:"duration" json:"duration"`
Status string `db:"status" json:"status"`
Attributes map[string]string `db:"attributes" json:"attributes,omitempty"`
Timestamp time.Time `db:"timestamp" json:"timestamp"`
}
Trace represents a distributed trace span.
type TraceQuery ¶
type TraceQuery struct {
InstanceID id.ID `json:"instance_id"`
TraceID string `json:"trace_id,omitempty"`
Operation string `json:"operation,omitempty"`
Since time.Time `json:"since"`
Until time.Time `json:"until"`
Limit int `json:"limit,omitempty"`
}
TraceQuery configures a trace query.