Documentation
¶
Overview ¶
Package observability provides a standalone alert repository to avoid import cycles
Package observability provides comprehensive alerting system with webhook delivery and SNS integration ¶
Package observability provides CloudWatch dashboard configuration and management ¶
Package observability provides DynamORM operation latency tracking ¶
Package observability provides EMF (Embedded Metric Format) metrics for CloudWatch This implementation follows AWS EMF specification for serverless environments
Package observability provides error constants for the observability package ¶
Package observability provides health check endpoints for production monitoring ¶
Package observability provides HTTP client latency tracking for federation and external API calls ¶
Package observability provides latency aggregation and percentile calculation services ¶
Package observability provides real-time latency alerting with configurable thresholds ¶
Package observability provides serverless-friendly metrics collection for AWS Lambda.
Usage in Lambda functions:
- Create a collector during init() or at handler start
- Record metrics during execution
- Call Flush() before handler returns to send metrics to CloudWatch
This implementation avoids background goroutines and polling to be compatible with serverless environments where Lambda containers can be frozen.
Package observability provides monitoring service configuration for alert routing and management ¶
Package observability provides performance optimization guidelines and monitoring ¶
Package observability provides distributed tracing support with AWS X-Ray integration ¶
Package observability provides webhook delivery service for alerts with retry logic and dead letter handling ¶
Package observability provides X-Ray middleware for Lambda functions
Index ¶
- Constants
- Variables
- func AddErrorToTrace(ctx context.Context, err error, remote bool)
- func AddServiceAnnotations(ctx context.Context, serviceName, serviceVersion string, ...)
- func CreateEMFPerformanceMonitoringMiddleware(emfService *EMFMetricsService) apptheory.Middleware
- func ExampleLambdaHandler()
- func GetLatencyContext(ctx context.Context) (operation string, startTime time.Time, ok bool)
- func MetricsMiddleware(collector *EnhancedMetricsCollector) func(next func()) func()
- func RecordRepositoryLatency(ctx context.Context, repository, method string, duration time.Duration, ...)
- func TraceDatabaseOperation(ctx context.Context, operation, tableName string, ...) error
- func TraceFederationCall(ctx context.Context, instance, method, url string, ...) error
- func TraceMediaProcessing(ctx context.Context, mediaType, operation string, ...) error
- func TraceSubsegment(ctx context.Context, name string, operation func(ctx context.Context) error) error
- func ValidateWebhookURL(webhookURL string) error
- func WithLatencyContext(ctx context.Context, operation string) context.Context
- func WrapLambdaFunc(config *XRayConfig, logger *zap.Logger, handlerFunc interface{}) interface{}
- func WrapLambdaHandler(config *XRayConfig, logger *zap.Logger, handler lambda.Handler) lambda.Handler
- type ActionType
- type Alert
- type AlertAction
- type AlertHistory
- type AlertRequest
- type AlertRoute
- type AlertRule
- type AlertSeverity
- type AlertState
- type AlertStats
- type AlertSummary
- type AlertingConfig
- type AlertingSystem
- func (a *AlertingSystem) CheckCost(ctx context.Context, costMicroCents float64)
- func (a *AlertingSystem) CheckErrorRate(ctx context.Context, service string, errorRate float64)
- func (a *AlertingSystem) CheckHealth(ctx context.Context, service string, isHealthy bool, errorMsg string)
- func (a *AlertingSystem) CheckLatency(ctx context.Context, service string, operation string, ...)
- func (a *AlertingSystem) CheckSecurity(ctx context.Context, eventType string, severity string, ...)
- func (a *AlertingSystem) Cleanup(ctx context.Context) error
- func (a *AlertingSystem) GetActiveAlerts(ctx context.Context, limit int) ([]*models.Alert, error)
- func (a *AlertingSystem) GetAlertStats(_ context.Context, _ time.Time) (*AlertStats, error)
- func (a *AlertingSystem) ProcessRetries(ctx context.Context) error
- func (a *AlertingSystem) ResolveAlert(ctx context.Context, alertID string) error
- func (a *AlertingSystem) SendAlert(ctx context.Context, alertReq *AlertRequest) error
- type AnnotationConfig
- type AxisConfig
- type CacheMetrics
- type CapacityMetrics
- type Condition
- type ConditionType
- type DashboardConfig
- func CreateAPIPerformanceDashboard(region, environment string) *DashboardConfig
- func CreateFederationDashboard(region, environment string) *DashboardConfig
- func CreateLesserOverviewDashboard(region, environment string) *DashboardConfig
- func CreateMediaProcessingDashboard(region, environment string) *DashboardConfig
- func GetAllDashboards(region, environment string) []*DashboardConfig
- type DashboardWidget
- type DashboardWidgetProps
- type DefaultMetricsRecorder
- type DynamORMMetrics
- func (dm *DynamORMMetrics) TrackBatch(ctx context.Context, repository string, operation string, count int, ...) error
- func (dm *DynamORMMetrics) TrackCreate(ctx context.Context, _ string, fn func() error) error
- func (dm *DynamORMMetrics) TrackDelete(ctx context.Context, _ string, fn func() error) error
- func (dm *DynamORMMetrics) TrackQuery(ctx context.Context, repository, method string, fn func() error) error
- func (dm *DynamORMMetrics) TrackRepositoryMethod(ctx context.Context, repository, method string, fn func() error) error
- func (dm *DynamORMMetrics) TrackUpdate(ctx context.Context, _ string, fn func() error) error
- type DynamORMTracker
- func (t *DynamORMTracker) TrackBatch(ctx context.Context, operation string, table string, count int, ...) error
- func (t *DynamORMTracker) TrackCreate(ctx context.Context, table string, createFn func() error) error
- func (t *DynamORMTracker) TrackDelete(ctx context.Context, table string, deleteFn func() error) error
- func (t *DynamORMTracker) TrackQuery(ctx context.Context, operation string, table string, queryFn func() error) error
- func (t *DynamORMTracker) TrackUpdate(ctx context.Context, table string, updateFn func() error) error
- type EMFBuffer
- type EMFCloudWatchMetrics
- type EMFDimension
- type EMFLog
- type EMFLogEntry
- type EMFMetadata
- type EMFMetric
- type EMFMetricData
- type EMFMetricDefinition
- type EMFMetrics
- func (emf *EMFMetrics) AddDimension(name, value string)
- func (emf *EMFMetrics) Flush()
- func (emf *EMFMetrics) IsEnabled() bool
- func (emf *EMFMetrics) PutMetric(name string, value float64, unit string, dimensions map[string]string)
- func (emf *EMFMetrics) RecordBusinessMetric(metricName string, value float64, unit string, dimensions map[string]string)
- func (emf *EMFMetrics) RecordConcurrency(operation string, activeCount int64)
- func (emf *EMFMetrics) RecordCost(operation string, costUSD float64)
- func (emf *EMFMetrics) RecordError(operation string, errorType string)
- func (emf *EMFMetrics) RecordFederationMetric(operation string, instance string, success bool, latencyMs float64)
- func (emf *EMFMetrics) RecordLatency(operation string, duration time.Duration)
- func (emf *EMFMetrics) RecordQueueDepth(queueName string, depth int64)
- func (emf *EMFMetrics) RecordSuccess(operation string)
- func (emf *EMFMetrics) RecordThroughput(operation string, count int64)
- func (emf *EMFMetrics) SetEnabled(enabled bool)
- func (emf *EMFMetrics) SetProperty(key string, value interface{})
- func (emf *EMFMetrics) StartLatencyTimer(ctx context.Context, operation string) *LatencyMetric
- type EMFMetricsCollector
- func (emc *EMFMetricsCollector) Flush() error
- func (emc *EMFMetricsCollector) GetBufferSize() int
- func (emc *EMFMetricsCollector) RecordCost(operation string, costUSD float64)
- func (emc *EMFMetricsCollector) RecordErrorRate(operation string, errorCount, totalCount int64)
- func (emc *EMFMetricsCollector) RecordLatency(operation string, duration time.Duration)
- func (emc *EMFMetricsCollector) RecordMetric(name string, value float64, unit types.StandardUnit, ...)
- func (emc *EMFMetricsCollector) RecordPerformanceMetrics(metrics *PerformanceMetrics)
- func (emc *EMFMetricsCollector) RecordThroughput(operation string, count int64)
- func (emc *EMFMetricsCollector) RemoveDimension(name string)
- func (emc *EMFMetricsCollector) SetDimension(name, value string)
- func (emc *EMFMetricsCollector) Stop()
- type EMFMetricsService
- func (ems *EMFMetricsService) FlushMetrics() error
- func (ems *EMFMetricsService) RecordBusinessMetrics(metricName string, value float64, unit string, ...)
- func (ems *EMFMetricsService) RecordDynamoDBMetrics(operation, tableName string, duration time.Duration, ...)
- func (ems *EMFMetricsService) RecordRequestMetrics(ctx *apptheory.Context, perfMetrics *PerformanceMetrics, statusCode int, ...)
- func (ems *EMFMetricsService) Stop()
- type EMFPayload
- type EnhancedMetric
- type EnhancedMetricsCollector
- func (c *EnhancedMetricsCollector) GetCurrentMetrics() map[string]*EnhancedMetric
- func (c *EnhancedMetricsCollector) GetErrorRates() ErrorRateMetric
- func (c *EnhancedMetricsCollector) GetLatencyPercentiles() PercentileMetric
- func (c *EnhancedMetricsCollector) GetMetricsJSON() ([]byte, error)
- func (c *EnhancedMetricsCollector) MarkFlushed()
- func (c *EnhancedMetricsCollector) RecordCacheHit(hit bool, cacheType string)
- func (c *EnhancedMetricsCollector) RecordDynamoDBCapacity(operation string, consumedRCU, consumedWCU float64)
- func (c *EnhancedMetricsCollector) RecordError(errorType string, statusCode int)
- func (c *EnhancedMetricsCollector) RecordLatency(operation string, latency time.Duration)
- func (c *EnhancedMetricsCollector) Reset()
- func (c *EnhancedMetricsCollector) ShouldFlush() bool
- type ErrorRateMetric
- type EscalationRule
- type HTTPLatencyTracker
- type HTTPMetrics
- type HTTPTracker
- type HealthCheck
- type HealthChecker
- func (hc *HealthChecker) DetailedHandler(w http.ResponseWriter, r *http.Request)
- func (hc *HealthChecker) LivenessHandler(w http.ResponseWriter, _ *http.Request)
- func (hc *HealthChecker) ReadinessHandler(w http.ResponseWriter, r *http.Request)
- func (hc *HealthChecker) RegisterHealthRoutes(mux *http.ServeMux)
- type HealthConfig
- type HealthResponse
- type HistoricalMetricsReader
- type HorizontalAnnotation
- type LatencyAggregator
- func (la *LatencyAggregator) GetAggregatedStats(service string, timeWindow time.Duration) (map[string]*LatencyStats, error)
- func (la *LatencyAggregator) GetCurrentStats(operation, service string) (*LatencyStats, error)
- func (la *LatencyAggregator) GetLatencyTrend(ctx context.Context, operation, service string, startTime, endTime time.Time, ...) (*LatencyTrend, error)
- func (la *LatencyAggregator) RecordLatency(operation, service string, duration time.Duration)
- func (la *LatencyAggregator) Start()
- func (la *LatencyAggregator) Stop()
- type LatencyAggregatorOption
- func WithAggregateInterval(interval time.Duration) LatencyAggregatorOption
- func WithCloudWatch(_ interface{}, namespace string) LatencyAggregatorOption
- func WithMaxBuckets(maxBuckets int) LatencyAggregatorOption
- func WithMetricsRepository(reader HistoricalMetricsReader) LatencyAggregatorOption
- func WithRetentionPeriod(period time.Duration) LatencyAggregatorOption
- type LatencyAlerter
- func (la *LatencyAlerter) AddRule(rule *AlertRule)
- func (la *LatencyAlerter) CheckLatency(ctx context.Context, operation, service string, latencyMs float64, ...)
- func (la *LatencyAlerter) GetAlertHistory() map[string]*AlertHistory
- func (la *LatencyAlerter) GetAlertRules() map[string]*AlertRule
- func (la *LatencyAlerter) RemoveRule(ruleName string)
- func (la *LatencyAlerter) SetEnabled(enabled bool)
- type LatencyBucket
- type LatencyDataPoint
- type LatencyMetric
- type LatencyStats
- type LatencyTrend
- type MetricBuffer
- type MetricLevel
- type MetricType
- type MetricsCollector
- func (mc *MetricsCollector) Flush()
- func (mc *MetricsCollector) RecordCost(operation string, costUSD float64)
- func (mc *MetricsCollector) RecordErrorRate(operation string, errorCount, totalCount int64)
- func (mc *MetricsCollector) RecordLatency(operation string, duration time.Duration)
- func (mc *MetricsCollector) RecordMetric(name string, value float64, unit types.StandardUnit, ...)
- func (mc *MetricsCollector) RecordPerformanceMetrics(metrics *PerformanceMetrics)
- func (mc *MetricsCollector) RecordThroughput(operation string, count int64)
- type MetricsRecorder
- type MonitoringService
- func (ms *MonitoringService) AddAlertRoute(route *AlertRoute)
- func (ms *MonitoringService) CheckCost(ctx context.Context, costMicroCents float64)
- func (ms *MonitoringService) CheckErrorRate(ctx context.Context, service string, errorRate float64)
- func (ms *MonitoringService) CheckHealth(ctx context.Context, service string, isHealthy bool, errorMsg string)
- func (ms *MonitoringService) CheckLatency(ctx context.Context, service, operation string, ...)
- func (ms *MonitoringService) CheckSecurity(ctx context.Context, eventType, severity string, ...)
- func (ms *MonitoringService) Cleanup(ctx context.Context) error
- func (ms *MonitoringService) GetActiveAlerts(ctx context.Context, limit int) ([]*AlertSummary, error)
- func (ms *MonitoringService) ProcessRetries(ctx context.Context) error
- func (ms *MonitoringService) RemoveAlertRoute(routeName string)
- func (ms *MonitoringService) SendAlert(ctx context.Context, alertReq *AlertRequest) error
- type MonitoringServiceConfig
- type PercentileMetric
- type PerformanceMetric
- type PerformanceMetrics
- type PerformanceOptimization
- func (po *PerformanceOptimization) GetMetrics() map[string]PerformanceMetric
- func (po *PerformanceOptimization) GetPerformanceReport() map[string]interface{}
- func (po *PerformanceOptimization) LogPerformanceSummary()
- func (po *PerformanceOptimization) TrackOperation(operation string, duration time.Duration, businessDuration time.Duration)
- func (po *PerformanceOptimization) ValidatePerformanceTargets() []string
- type ScheduleConfig
- type StandaloneAlertRepository
- func (r *StandaloneAlertRepository) CleanupOldAlerts(ctx context.Context, olderThan time.Duration) (int, error)
- func (r *StandaloneAlertRepository) CreateAlert(ctx context.Context, alert *models.Alert) error
- func (r *StandaloneAlertRepository) GetActiveAlerts(ctx context.Context, limit int) ([]*models.Alert, error)
- func (r *StandaloneAlertRepository) GetAlertsNeedingRetry(ctx context.Context, limit int) ([]*models.Alert, error)
- func (r *StandaloneAlertRepository) GetByID(ctx context.Context, alertID string) (*models.Alert, error)
- func (r *StandaloneAlertRepository) ResolveAlert(ctx context.Context, alertID string) error
- func (r *StandaloneAlertRepository) Update(ctx context.Context, alert *models.Alert) error
- type StandaloneDeadLetterRepository
- type StandaloneWebhookRepository
- func (r *StandaloneWebhookRepository) CreateDelivery(ctx context.Context, delivery *models.WebhookDelivery) error
- func (r *StandaloneWebhookRepository) GetDeliveriesByAlert(ctx context.Context, alertID string, limit int) ([]*models.WebhookDelivery, error)
- func (r *StandaloneWebhookRepository) GetPendingRetries(ctx context.Context, limit int) ([]*models.WebhookDelivery, error)
- func (r *StandaloneWebhookRepository) UpdateDelivery(ctx context.Context, delivery *models.WebhookDelivery) error
- type ThrottleConfig
- type TraceContext
- type TracingConfig
- type TracingManager
- func (tm *TracingManager) AddAnnotation(ctx context.Context, key string, value interface{})
- func (tm *TracingManager) AddError(ctx context.Context, err error, remote bool)
- func (tm *TracingManager) AddMetadata(ctx context.Context, namespace string, data map[string]interface{})
- func (tm *TracingManager) CreateTracingMiddleware() func(next func(ctx context.Context) error) func(ctx context.Context) error
- func (tm *TracingManager) ExtractTraceHeaders(headers map[string]string) *TraceContext
- func (tm *TracingManager) GetTraceContext(ctx context.Context) *TraceContext
- func (tm *TracingManager) InjectTraceHeaders(ctx context.Context, headers map[string]string)
- func (tm *TracingManager) IsEnabled() bool
- func (tm *TracingManager) SetHTTPRequest(ctx context.Context, method, url string, userAgent string, clientIP string)
- func (tm *TracingManager) SetHTTPResponse(ctx context.Context, statusCode int, contentLength int64)
- func (tm *TracingManager) SetUser(ctx context.Context, userID string)
- func (tm *TracingManager) StartSegment(ctx context.Context, name string) (context.Context, *xray.Segment)
- func (tm *TracingManager) StartSubsegment(ctx context.Context, name string) (context.Context, *xray.Segment)
- func (tm *TracingManager) TraceDatabase(ctx context.Context, operation string, tableName string, ...) error
- func (tm *TracingManager) TraceExternalCall(ctx context.Context, serviceName string, method string, url string, ...) error
- func (tm *TracingManager) TraceLambdaFunction(ctx context.Context, functionName string, ...) error
- type TrendAnalysis
- type VerticalAnnotation
- type WebhookConfig
- type WebhookDeliveryConfig
- type WebhookDeliveryService
- type XRayConfig
- type YAxisConfig
Constants ¶
const ( // Latency metrics MetricLatency = "Latency" MetricLatencyP50 = "LatencyP50" MetricLatencyP90 = "LatencyP90" MetricLatencyP99 = "LatencyP99" // Throughput metrics MetricThroughput = "Throughput" MetricRequestsPerSecond = "RequestsPerSecond" // Error metrics MetricErrors = "Errors" MetricErrorRate = "ErrorRate" MetricSuccess = "Success" MetricSuccessRate = "SuccessRate" // Concurrency metrics MetricConcurrency = "Concurrency" MetricActiveConnections = "ActiveConnections" MetricColdStarts = "ColdStarts" MetricColdStartDuration = "ColdStartDuration" )
Metric Names - Core Performance
const ( // Federation metrics MetricFederationSuccess = "FederationSuccess" MetricFederationError = "FederationError" MetricFederationLatency = "FederationLatency" MetricInboxMessages = "InboxMessages" MetricOutboxMessages = "OutboxMessages" MetricSignatureVerification = "SignatureVerification" MetricCrawlerBlocked = "CrawlerBlocked" MetricCrawlerBlockedByRoute = "CrawlerBlockedByRoute" MetricCrawlerRateLimited = "CrawlerRateLimited" MetricCrawlerRateLimitedByRoute = "CrawlerRateLimitedByRoute" MetricCrawlerBypassed = "CrawlerBypassed" // Queue metrics MetricQueueDepth = "QueueDepth" MetricQueueDepthCritical = "QueueDepthCritical" MetricQueueDepthWarning = "QueueDepthWarning" MetricQueueDepthHealthy = "QueueDepthHealthy" MetricQueueProcessingTime = "QueueProcessingTime" // Media metrics MetricMediaProcessing = "MediaProcessing" MetricMediaProcessingTime = "MediaProcessingTime" MetricMediaUpload = "MediaUpload" MetricMediaTranscoding = "MediaTranscoding" MetricMediaStorage = "MediaStorage" // User activity metrics MetricPostsPerMinute = "PostsPerMinute" MetricFollowsPerMinute = "FollowsPerMinute" MetricLikesPerMinute = "LikesPerMinute" MetricActiveUsers = "ActiveUsers" MetricDailyActiveUsers = "DailyActiveUsers" )
Metric Names - Business Logic
const ( // Database metrics MetricDynamoReadLatency = "DynamoReadLatency" MetricDynamoWriteLatency = "DynamoWriteLatency" MetricDynamoReadCapacity = "DynamoReadCapacity" MetricDynamoWriteCapacity = "DynamoWriteCapacity" MetricDynamoThrottling = "DynamoThrottling" // Lambda metrics MetricLambdaDuration = "LambdaDuration" MetricLambdaMemoryUsed = "LambdaMemoryUsed" MetricLambdaTimeout = "LambdaTimeout" MetricLambdaConcurrency = "LambdaConcurrency" // Cost metrics MetricCost = "Cost" MetricCostMicrocents = "CostMicrocents" MetricCostPerUser = "CostPerUser" MetricCostPerRequest = "CostPerRequest" // Health metrics MetricSystemHealth = "SystemHealth" MetricComponentHealth = "ComponentHealth" MetricHealthCheck = "HealthCheck" )
Metric Names - Infrastructure
const ( DimensionService = "Service" DimensionOperation = "Operation" DimensionEndpoint = "Endpoint" DimensionMethod = "Method" DimensionStatusCode = "StatusCode" DimensionErrorType = "ErrorType" DimensionEnvironment = "Environment" DimensionRegion = "Region" DimensionInstance = "Instance" DimensionQueue = "Queue" DimensionResource = "Resource" DimensionComponent = "Component" DimensionMediaType = "MediaType" DimensionUserType = "UserType" DimensionCrawlerCategory = "CrawlerCategory" DimensionRouteClass = "RouteClass" )
Dimension Names
const ( // P0 thresholds require immediate attention AlertP0ErrorRatePercent = 10.0 // 10% error rate AlertP0LatencyP99Milliseconds = 5000 // 5 second P99 latency AlertP0QueueDepthMessages = 10000 // 10k messages in queue AlertP0CostDollarsPerHour = 10.0 // $10/hour spend rate AlertP0MemoryUtilizationPercent = 95.0 // 95% memory utilization )
Alert Thresholds - P0 (Critical)
const ( // P1 thresholds require prompt attention AlertP1ErrorRatePercent = 5.0 // 5% error rate AlertP1LatencyP90Milliseconds = 2000 // 2 second P90 latency AlertP1QueueDepthMessages = 1000 // 1k messages in queue AlertP1CostDollarsPerHour = 1.0 // $1/hour spend rate AlertP1MemoryUtilizationPercent = 85.0 // 85% memory utilization AlertP1FederationFailurePercent = 20.0 // 20% federation failures )
Alert Thresholds - P1 (High)
const ( // P2 thresholds for early warning AlertP2ErrorRatePercent = 2.0 // 2% error rate AlertP2LatencyP90Milliseconds = 1000 // 1 second P90 latency AlertP2QueueDepthMessages = 100 // 100 messages in queue AlertP2CostDollarsPerHour = 0.10 // $0.10/hour spend rate AlertP2MemoryUtilizationPercent = 75.0 // 75% memory utilization AlertP2ColdStartsPerMinute = 10 // 10 cold starts per minute )
Alert Thresholds - P2 (Warning)
const ( AlertWindowP0Minutes = 2 // P0 alerts evaluate over 2 minutes AlertWindowP1Minutes = 5 // P1 alerts evaluate over 5 minutes AlertWindowP2Minutes = 10 // P2 alerts evaluate over 10 minutes )
Alert Evaluation Windows (in minutes)
const ( HealthEndpointLive = "/health/live" HealthEndpointReady = "/health/ready" HealthEndpointDetailed = "/health/detailed" )
Health Check Endpoints
const ( HealthStatusHealthy = "healthy" HealthStatusWarning = "warning" HealthStatusCritical = "critical" HealthStatusUnknown = "unknown" )
Health Status Values
const ( UnitSeconds = "Seconds" UnitMilliseconds = "Milliseconds" UnitMicroseconds = "Microseconds" UnitCount = "Count" UnitCountPerSecond = "Count/Second" UnitPercent = "Percent" UnitBytes = "Bytes" UnitKilobytes = "Kilobytes" UnitMegabytes = "Megabytes" UnitGigabytes = "Gigabytes" UnitNone = "None" )
Metric Units (following CloudWatch standards)
const ( TracingSampleRatePercent = 10.0 // Sample 10% of traces MetricsSampleRatePercent = 100.0 // Sample all metrics LogsSampleRatePercent = 100.0 // Sample all logs )
Sampling Configuration
const ( // Maximum overhead targets MaxMetricsOverheadPercent = 1.0 // Max 1% performance overhead MaxBatchSize = 100 // Max metrics per batch MaxFlushIntervalSeconds = 30 // Max time before forced flush // Buffer sizes MetricsBufferSize = 1000 // Max buffered metrics LogsBufferSize = 10000 // Max buffered log entries )
Performance Configuration
const ( RunbookBaseURL = "https://docs.lesser.app/runbooks" RunbookHighErrorRate = RunbookBaseURL + "/high-error-rate" RunbookHighLatency = RunbookBaseURL + "/high-latency" RunbookHighCost = RunbookBaseURL + "/high-cost" RunbookQueueBacklog = RunbookBaseURL + "/queue-backlog" RunbookHealthFailure = RunbookBaseURL + "/health-failure" RunbookSecurityIncident = RunbookBaseURL + "/security-incident" RunbookCapacityIssue = RunbookBaseURL + "/capacity-issue" RunbookFederationIssue = RunbookBaseURL + "/federation-issue" RunbookColdStartIssue = RunbookBaseURL + "/cold-start-issue" )
Runbook URLs
const ( ErrorTypeValidation = "validation" ErrorTypeAuthentication = "authentication" ErrorTypeAuthorization = "authorization" ErrorTypeRateLimit = "rate_limit" ErrorTypeTimeout = "timeout" ErrorTypeInternal = "internal" ErrorTypeDependency = "dependency" ErrorTypeFederation = "federation" ErrorTypeNotFound = "not_found" ErrorTypeConflict = "conflict" )
Error Classifications
const ( TrueString = "true" FalseString = "false" )
Boolean String Constants
const ( AlertSeverityInfo = "info" AlertSeverityWarning = "warning" AlertSeverityError = "error" AlertSeverityCritical = "critical" AlertSeverityUnknown = "unknown" )
Alert severity constants
const (
StatusUnknown = "unknown"
)
Status constants (legacy compatibility)
const (
TrendDirectionStable = "stable"
)
Trend direction constants
Variables ¶
var ( // ErrLoggerRequired indicates that a logger instance is required ErrLoggerRequired = errors.LoggerRequired() // ErrDatabaseRequired indicates that a database connection is required ErrDatabaseRequired = errors.DatabaseRequired() // ErrSNSPublishFailed indicates that publishing to SNS failed ErrSNSPublishFailed = errors.SNSPublishFailed(nil) )
Legacy error variables for backwards compatibility These are now wrappers around the centralized error system
var PerformanceTargets = struct { MaxOverheadPercent float64 MaxOperationLatencyMS int64 MaxMemoryOverheadMB int64 MaxColdStartOverheadMS int64 }{ MaxOverheadPercent: MaxMetricsOverheadPercent, MaxOperationLatencyMS: 1, MaxMemoryOverheadMB: 5, MaxColdStartOverheadMS: 200, }
PerformanceTargets defines our performance targets
Functions ¶
func AddErrorToTrace ¶
AddErrorToTrace adds an error to the current X-Ray segment
func AddServiceAnnotations ¶
func AddServiceAnnotations(ctx context.Context, serviceName, serviceVersion string, metadata map[string]interface{})
AddServiceAnnotations adds standard service annotations to X-Ray segment
func CreateEMFPerformanceMonitoringMiddleware ¶
func CreateEMFPerformanceMonitoringMiddleware(emfService *EMFMetricsService) apptheory.Middleware
CreateEMFPerformanceMonitoringMiddleware replaces the polling-based middleware This is a drop-in replacement for createPerformanceMonitoringMiddleware
func ExampleLambdaHandler ¶
func ExampleLambdaHandler()
ExampleLambdaHandler demonstrates how to properly use EMF metrics in a Lambda function
func GetLatencyContext ¶
GetLatencyContext extracts latency tracking information from context
func MetricsMiddleware ¶
func MetricsMiddleware(collector *EnhancedMetricsCollector) func(next func()) func()
MetricsMiddleware creates a middleware that records request metrics
func RecordRepositoryLatency ¶
func RecordRepositoryLatency(ctx context.Context, repository, method string, duration time.Duration, success bool, logger *zap.Logger, recorder MetricsRecorder)
RecordRepositoryLatency records latency for repository operations
func TraceDatabaseOperation ¶
func TraceDatabaseOperation(ctx context.Context, operation, tableName string, fn func(ctx context.Context) error) error
TraceDatabaseOperation traces DynamoDB operations
func TraceFederationCall ¶
func TraceFederationCall(ctx context.Context, instance, method, url string, fn func(ctx context.Context) error) error
TraceFederationCall traces federation HTTP calls
func TraceMediaProcessing ¶
func TraceMediaProcessing(ctx context.Context, mediaType, operation string, fn func(ctx context.Context) error) error
TraceMediaProcessing traces media processing operations
func TraceSubsegment ¶
func TraceSubsegment(ctx context.Context, name string, operation func(ctx context.Context) error) error
TraceSubsegment creates a traced subsegment for operations
func ValidateWebhookURL ¶
ValidateWebhookURL validates a webhook URL
func WithLatencyContext ¶
WithLatencyContext adds latency tracking information to context
func WrapLambdaFunc ¶
func WrapLambdaFunc(config *XRayConfig, logger *zap.Logger, handlerFunc interface{}) interface{}
WrapLambdaFunc wraps a Lambda function with X-Ray tracing
func WrapLambdaHandler ¶
func WrapLambdaHandler(config *XRayConfig, logger *zap.Logger, handler lambda.Handler) lambda.Handler
WrapLambdaHandler wraps a Lambda handler with X-Ray tracing
Types ¶
type ActionType ¶
type ActionType int
ActionType represents the type of action to take when an alert fires
const ( // ActionLog represents logging the alert ActionLog ActionType = iota // ActionMetric represents recording a metric for the alert ActionMetric // ActionWebhook represents sending a webhook for the alert ActionWebhook // ActionEmail represents sending an email for the alert ActionEmail // ActionSlack represents sending a Slack message for the alert ActionSlack )
func (ActionType) String ¶
func (a ActionType) String() string
type Alert ¶
type Alert struct {
RuleName string `json:"rule_name"`
Service string `json:"service"`
Operation string `json:"operation"`
Severity AlertSeverity `json:"severity"`
State AlertState `json:"state"`
Message string `json:"message"`
Timestamp time.Time `json:"timestamp"`
Values map[string]float64 `json:"values"`
Dimensions map[string]string `json:"dimensions"`
Actions []AlertAction `json:"actions"`
Context map[string]interface{} `json:"context"`
}
Alert represents a triggered alert
type AlertAction ¶
type AlertAction struct {
Type ActionType `json:"type"`
Config map[string]interface{} `json:"config"`
Enabled bool `json:"enabled"`
}
AlertAction defines what to do when an alert is triggered
type AlertHistory ¶
type AlertHistory struct {
RuleName string `json:"rule_name"`
LastFired time.Time `json:"last_fired"`
LastResolved time.Time `json:"last_resolved"`
FireCount int `json:"fire_count"`
CurrentState AlertState `json:"current_state"`
LastLatency float64 `json:"last_latency_ms"`
LastP95 float64 `json:"last_p95_ms"`
LastP99 float64 `json:"last_p99_ms"`
ConsecutiveFires int `json:"consecutive_fires"`
}
AlertHistory tracks alert firing history
type AlertRequest ¶
type AlertRequest struct {
Type string `json:"type"`
Severity string `json:"severity"`
Priority string `json:"priority,omitempty"`
Title string `json:"title"`
Description string `json:"description"`
Message string `json:"message,omitempty"`
Service string `json:"service"`
Region string `json:"region"`
Source string `json:"source"`
RunbookURL string `json:"runbook_url,omitempty"`
Dimensions map[string]string `json:"dimensions,omitempty"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
Values map[string]float64 `json:"values,omitempty"`
Thresholds map[string]float64 `json:"thresholds,omitempty"`
}
AlertRequest represents a request to send an alert
type AlertRoute ¶
type AlertRoute struct {
Name string `json:"name"`
Description string `json:"description"`
Enabled bool `json:"enabled"`
// Matching criteria
AlertTypes []string `json:"alert_types,omitempty"` // error_rate, latency, cost, etc.
SeverityLevels []string `json:"severity_levels,omitempty"` // critical, error, warning, info
Services []string `json:"services,omitempty"` // api, federation, etc.
Priorities []string `json:"priorities,omitempty"` // P0, P1, P2, P3
// Delivery configuration
WebhookURLs []string `json:"webhook_urls"`
SNSTopicARNs []string `json:"sns_topic_arns,omitempty"`
EmailAddresses []string `json:"email_addresses,omitempty"`
SlackChannels []string `json:"slack_channels,omitempty"`
// Timing and throttling
Throttle *ThrottleConfig `json:"throttle,omitempty"`
Schedule *ScheduleConfig `json:"schedule,omitempty"`
EscalationRules []*EscalationRule `json:"escalation_rules,omitempty"`
}
AlertRoute defines how alerts should be routed based on criteria
type AlertRule ¶
type AlertRule struct {
Name string `json:"name"`
Operation string `json:"operation"`
Service string `json:"service"`
Threshold float64 `json:"threshold_ms"`
P95Threshold float64 `json:"p95_threshold_ms"`
P99Threshold float64 `json:"p99_threshold_ms"`
WindowSize time.Duration `json:"window_size"`
MinDataPoints int `json:"min_data_points"`
AlertCooldown time.Duration `json:"alert_cooldown"`
Severity AlertSeverity `json:"severity"`
Enabled bool `json:"enabled"`
Conditions []Condition `json:"conditions"`
Actions []AlertAction `json:"actions"`
}
AlertRule defines the conditions for triggering an alert
type AlertSeverity ¶
type AlertSeverity int
AlertSeverity represents the severity level of alerts
const ( // SeverityInfo represents informational alerts SeverityInfo AlertSeverity = iota // SeverityWarning represents warning level alerts SeverityWarning // SeverityError represents error level alerts SeverityError // SeverityCritical represents critical level alerts SeverityCritical )
func (AlertSeverity) String ¶
func (s AlertSeverity) String() string
type AlertState ¶
type AlertState int
AlertState represents the current state of an alert
const ( // StateNormal represents the normal (not firing) state StateNormal AlertState = iota // StateFiring represents an actively firing alert StateFiring // StateResolved represents a resolved alert StateResolved )
func (AlertState) String ¶
func (s AlertState) String() string
type AlertStats ¶
type AlertStats struct {
TotalAlerts int64
ActiveAlerts int64
ResolvedAlerts int64
CriticalAlerts int64
WarningAlerts int64
InfoAlerts int64
DeliverySuccessRate float64
AverageResponseTime time.Duration
}
AlertStats represents alert statistics
type AlertSummary ¶
type AlertSummary struct {
AlertID string `json:"alert_id"`
Type string `json:"type"`
Severity string `json:"severity"`
Priority string `json:"priority"`
Status string `json:"status"`
Title string `json:"title"`
Service string `json:"service"`
FiredAt time.Time `json:"fired_at"`
RunbookURL string `json:"runbook_url,omitempty"`
}
AlertSummary provides a summary view of an alert
type AlertingConfig ¶
type AlertingConfig struct {
Logger *zap.Logger
DB core.DB
TableName string
CostService *cost.TrackingService
SNSClient snsPublishAPI
SNSTopicArn string
WebhookURL string
WebhookHeaders map[string]string
Environment string
Region string
ServiceName string
Enabled bool
}
AlertingConfig contains configuration for the alerting system
type AlertingSystem ¶
type AlertingSystem struct {
// contains filtered or unexported fields
}
AlertingSystem provides comprehensive alerting with webhook delivery and SNS integration
func NewAlertingSystem ¶
func NewAlertingSystem(config *AlertingConfig) (*AlertingSystem, error)
NewAlertingSystem creates a new comprehensive alerting system
func (*AlertingSystem) CheckCost ¶
func (a *AlertingSystem) CheckCost(ctx context.Context, costMicroCents float64)
CheckCost triggers cost alert if threshold exceeded
func (*AlertingSystem) CheckErrorRate ¶
func (a *AlertingSystem) CheckErrorRate(ctx context.Context, service string, errorRate float64)
CheckErrorRate triggers error rate alert if threshold exceeded
func (*AlertingSystem) CheckHealth ¶
func (a *AlertingSystem) CheckHealth(ctx context.Context, service string, isHealthy bool, errorMsg string)
CheckHealth triggers health alert for service issues
func (*AlertingSystem) CheckLatency ¶
func (a *AlertingSystem) CheckLatency(ctx context.Context, service string, operation string, latencyMs, p95Ms, p99Ms float64)
CheckLatency triggers latency alert if threshold exceeded
func (*AlertingSystem) CheckSecurity ¶
func (a *AlertingSystem) CheckSecurity(ctx context.Context, eventType string, severity string, details map[string]interface{})
CheckSecurity triggers security alerts
func (*AlertingSystem) Cleanup ¶
func (a *AlertingSystem) Cleanup(ctx context.Context) error
Cleanup removes old alerts and deliveries
func (*AlertingSystem) GetActiveAlerts ¶
GetActiveAlerts retrieves currently active alerts
func (*AlertingSystem) GetAlertStats ¶
func (a *AlertingSystem) GetAlertStats(_ context.Context, _ time.Time) (*AlertStats, error)
GetAlertStats retrieves alert statistics
func (*AlertingSystem) ProcessRetries ¶
func (a *AlertingSystem) ProcessRetries(ctx context.Context) error
ProcessRetries processes failed webhook deliveries
func (*AlertingSystem) ResolveAlert ¶
func (a *AlertingSystem) ResolveAlert(ctx context.Context, alertID string) error
ResolveAlert resolves an active alert
func (*AlertingSystem) SendAlert ¶
func (a *AlertingSystem) SendAlert(ctx context.Context, alertReq *AlertRequest) error
SendAlert sends an alert through all configured channels
type AnnotationConfig ¶
type AnnotationConfig struct {
Horizontal []HorizontalAnnotation `json:"horizontal,omitempty"`
Vertical []VerticalAnnotation `json:"vertical,omitempty"`
}
AnnotationConfig configures annotations and alarms
type AxisConfig ¶
type AxisConfig struct {
Min float64 `json:"min,omitempty"`
Max float64 `json:"max,omitempty"`
Label string `json:"label,omitempty"`
}
AxisConfig configures an individual axis
type CacheMetrics ¶
type CacheMetrics struct {
HitRate float64 `json:"hit_rate"`
MissRate float64 `json:"miss_rate"`
TotalHits int64 `json:"total_hits"`
TotalMisses int64 `json:"total_misses"`
EvictionRate float64 `json:"eviction_rate"`
}
CacheMetrics represents cache performance metrics
type CapacityMetrics ¶
type CapacityMetrics struct {
ReadCapacityUnits float64 `json:"read_capacity_units"`
WriteCapacityUnits float64 `json:"write_capacity_units"`
ReadThrottles int64 `json:"read_throttles"`
WriteThrottles int64 `json:"write_throttles"`
ConsumedReadCapacity float64 `json:"consumed_read_capacity"`
ConsumedWriteCapacity float64 `json:"consumed_write_capacity"`
}
CapacityMetrics represents DynamoDB capacity consumption
type Condition ¶
type Condition struct {
Type ConditionType `json:"type"` // "latency", "error_rate", "throughput"
Operator string `json:"operator"` // ">", "<", ">=", "<=", "=="
Value float64 `json:"value"` // Threshold value
Percentile string `json:"percentile,omitempty"` // "p50", "p95", "p99"
}
Condition represents a condition that must be met for an alert
type ConditionType ¶
type ConditionType int
ConditionType represents the type of alert condition
const ( // ConditionLatency represents latency-based alerting ConditionLatency ConditionType = iota // ConditionErrorRate represents error rate based alerting ConditionErrorRate // ConditionThroughput represents throughput based alerting ConditionThroughput )
func (ConditionType) String ¶
func (c ConditionType) String() string
type DashboardConfig ¶
type DashboardConfig struct {
Name string `json:"name"`
Description string `json:"description"`
Widgets []DashboardWidget `json:"widgets"`
Period int `json:"period"`
Region string `json:"region"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
}
DashboardConfig represents a complete CloudWatch dashboard configuration
func CreateAPIPerformanceDashboard ¶
func CreateAPIPerformanceDashboard(region, environment string) *DashboardConfig
CreateAPIPerformanceDashboard creates a detailed API performance dashboard
func CreateFederationDashboard ¶
func CreateFederationDashboard(region, environment string) *DashboardConfig
CreateFederationDashboard creates a dashboard focused on ActivityPub federation
func CreateLesserOverviewDashboard ¶
func CreateLesserOverviewDashboard(region, environment string) *DashboardConfig
CreateLesserOverviewDashboard creates the main overview dashboard for Lesser
func CreateMediaProcessingDashboard ¶
func CreateMediaProcessingDashboard(region, environment string) *DashboardConfig
CreateMediaProcessingDashboard creates a dashboard for media processing metrics
func GetAllDashboards ¶
func GetAllDashboards(region, environment string) []*DashboardConfig
GetAllDashboards returns all available dashboard configurations
func (*DashboardConfig) ToJSON ¶
func (dc *DashboardConfig) ToJSON() (string, error)
ToJSON converts dashboard config to JSON string
type DashboardWidget ¶
type DashboardWidget struct {
Type string `json:"type"`
X int `json:"x"`
Y int `json:"y"`
Width int `json:"width"`
Height int `json:"height"`
Properties DashboardWidgetProps `json:"properties"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
}
DashboardWidget represents a single widget in a dashboard
type DashboardWidgetProps ¶
type DashboardWidgetProps struct {
Title string `json:"title"`
View string `json:"view,omitempty"`
Stacked bool `json:"stacked,omitempty"`
Region string `json:"region"`
Period int `json:"period,omitempty"`
Stat string `json:"stat,omitempty"`
YAxis *YAxisConfig `json:"yAxis,omitempty"`
Metrics [][]interface{} `json:"metrics"`
Annotations *AnnotationConfig `json:"annotations,omitempty"`
}
DashboardWidgetProps contains widget-specific properties
type DefaultMetricsRecorder ¶
type DefaultMetricsRecorder struct {
// contains filtered or unexported fields
}
DefaultMetricsRecorder implements MetricsRecorder using the MetricRecord repository pattern
func NewDefaultMetricsRecorder ¶
func NewDefaultMetricsRecorder(createMetricFn func(ctx context.Context, metric *models.MetricRecord) error, serviceName string) *DefaultMetricsRecorder
NewDefaultMetricsRecorder creates a default metrics recorder
type DynamORMMetrics ¶
type DynamORMMetrics struct {
// contains filtered or unexported fields
}
DynamORMMetrics provides pre-configured metrics tracking for common patterns
func NewDynamORMMetrics ¶
func NewDynamORMMetrics(db core.DB, tableName string, logger *zap.Logger, recorder MetricsRecorder) *DynamORMMetrics
NewDynamORMMetrics creates pre-configured DynamORM metrics tracker
func (*DynamORMMetrics) TrackBatch ¶
func (dm *DynamORMMetrics) TrackBatch(ctx context.Context, repository string, operation string, count int, fn func() error) error
TrackBatch tracks a batch operation
func (*DynamORMMetrics) TrackCreate ¶
TrackCreate tracks a create operation
func (*DynamORMMetrics) TrackDelete ¶
TrackDelete tracks a delete operation
func (*DynamORMMetrics) TrackQuery ¶
func (dm *DynamORMMetrics) TrackQuery(ctx context.Context, repository, method string, fn func() error) error
TrackQuery tracks a query operation
func (*DynamORMMetrics) TrackRepositoryMethod ¶
func (dm *DynamORMMetrics) TrackRepositoryMethod(ctx context.Context, repository, method string, fn func() error) error
TrackRepositoryMethod tracks a repository method execution
func (*DynamORMMetrics) TrackUpdate ¶
TrackUpdate tracks an update operation
type DynamORMTracker ¶
type DynamORMTracker struct {
// contains filtered or unexported fields
}
DynamORMTracker wraps DynamORM operations with latency tracking
func NewDynamORMTracker ¶
func NewDynamORMTracker(db core.DB, logger *zap.Logger, recorder MetricsRecorder) *DynamORMTracker
NewDynamORMTracker creates a new DynamORM operation tracker
func (*DynamORMTracker) TrackBatch ¶
func (t *DynamORMTracker) TrackBatch(ctx context.Context, operation string, table string, count int, batchFn func() error) error
TrackBatch wraps a DynamORM batch operation with latency tracking
func (*DynamORMTracker) TrackCreate ¶
func (t *DynamORMTracker) TrackCreate(ctx context.Context, table string, createFn func() error) error
TrackCreate wraps a DynamORM create operation with latency tracking
func (*DynamORMTracker) TrackDelete ¶
func (t *DynamORMTracker) TrackDelete(ctx context.Context, table string, deleteFn func() error) error
TrackDelete wraps a DynamORM delete operation with latency tracking
func (*DynamORMTracker) TrackQuery ¶
func (t *DynamORMTracker) TrackQuery(ctx context.Context, operation string, table string, queryFn func() error) error
TrackQuery wraps a DynamORM query operation with latency tracking
func (*DynamORMTracker) TrackUpdate ¶
func (t *DynamORMTracker) TrackUpdate(ctx context.Context, table string, updateFn func() error) error
TrackUpdate wraps a DynamORM update operation with latency tracking
type EMFBuffer ¶
type EMFBuffer struct {
// contains filtered or unexported fields
}
EMFBuffer holds metrics before flushing - no background goroutines
func (*EMFBuffer) GetAndClear ¶
GetAndClear returns all metrics and clears the buffer (thread-safe)
func (*EMFBuffer) ShouldFlush ¶
ShouldFlush returns true if the buffer should be flushed
type EMFCloudWatchMetrics ¶
type EMFCloudWatchMetrics struct {
Namespace string `json:"Namespace"`
Dimensions [][]string `json:"Dimensions"`
Metrics []EMFMetricDefinition `json:"Metrics"`
}
EMFCloudWatchMetrics defines the metrics structure for CloudWatch
type EMFDimension ¶
EMFDimension represents metric dimensions
type EMFLog ¶
type EMFLog struct {
AWS EMFMetadata `json:"_aws"`
Timestamp int64 `json:"Timestamp,omitempty"`
Dimensions map[string]string `json:",inline"`
Metrics map[string]interface{} `json:",inline"`
}
EMFLog represents the complete EMF log structure written to stdout
type EMFLogEntry ¶
type EMFLogEntry struct {
AWS EMFMetadata `json:"_aws"`
Timestamp int64 `json:"timestamp,omitempty"`
Message string `json:"message,omitempty"`
Level string `json:"level,omitempty"`
Service string `json:"service"`
Namespace string `json:"namespace"`
Dimensions map[string]string `json:"dimensions,omitempty"`
Metrics map[string]interface{} `json:"metrics"`
Properties map[string]interface{} `json:"properties,omitempty"`
}
EMFLogEntry represents the EMF log entry structure that CloudWatch expects
type EMFMetadata ¶
type EMFMetadata struct {
Timestamp int64 `json:"Timestamp"`
LogGroup string `json:"LogGroup,omitempty"`
LogStream string `json:"LogStream,omitempty"`
CloudWatchMetrics []EMFCloudWatchMetrics `json:"CloudWatchMetrics"`
}
EMFMetadata contains EMF-specific metadata
type EMFMetric ¶
type EMFMetric struct {
Name string
Value float64
Unit string
Dimensions map[string]string
Timestamp int64
}
EMFMetric represents a metric in EMF format
type EMFMetricData ¶
type EMFMetricData struct {
MetricName string `json:"metric_name"`
Value float64 `json:"value"`
Unit string `json:"unit"`
Timestamp int64 `json:"timestamp"`
}
EMFMetricData represents a single EMF metric
type EMFMetricDefinition ¶
EMFMetricDefinition defines a single metric
type EMFMetrics ¶
type EMFMetrics struct {
// contains filtered or unexported fields
}
EMFMetrics handles CloudWatch EMF metric emission
func NewEMFMetrics ¶
func NewEMFMetrics(logger *zap.Logger, namespace, service string) *EMFMetrics
NewEMFMetrics creates a new EMF metrics collector
func (*EMFMetrics) AddDimension ¶
func (emf *EMFMetrics) AddDimension(name, value string)
AddDimension adds a dimension that will be applied to all metrics
func (*EMFMetrics) Flush ¶
func (emf *EMFMetrics) Flush()
Flush emits all accumulated metrics to CloudWatch via structured logs
func (*EMFMetrics) IsEnabled ¶
func (emf *EMFMetrics) IsEnabled() bool
IsEnabled returns whether EMF metrics are enabled
func (*EMFMetrics) PutMetric ¶
func (emf *EMFMetrics) PutMetric(name string, value float64, unit string, dimensions map[string]string)
PutMetric records a metric value
func (*EMFMetrics) RecordBusinessMetric ¶
func (emf *EMFMetrics) RecordBusinessMetric(metricName string, value float64, unit string, dimensions map[string]string)
RecordBusinessMetric records business-specific metrics
func (*EMFMetrics) RecordConcurrency ¶
func (emf *EMFMetrics) RecordConcurrency(operation string, activeCount int64)
RecordConcurrency records concurrency metrics
func (*EMFMetrics) RecordCost ¶
func (emf *EMFMetrics) RecordCost(operation string, costUSD float64)
RecordCost records cost metrics
func (*EMFMetrics) RecordError ¶
func (emf *EMFMetrics) RecordError(operation string, errorType string)
RecordError records error metrics
func (*EMFMetrics) RecordFederationMetric ¶
func (emf *EMFMetrics) RecordFederationMetric(operation string, instance string, success bool, latencyMs float64)
RecordFederationMetric records federation-specific metrics
func (*EMFMetrics) RecordLatency ¶
func (emf *EMFMetrics) RecordLatency(operation string, duration time.Duration)
RecordLatency records latency metrics with enhanced percentile tracking
func (*EMFMetrics) RecordQueueDepth ¶
func (emf *EMFMetrics) RecordQueueDepth(queueName string, depth int64)
RecordQueueDepth records queue depth metrics
func (*EMFMetrics) RecordSuccess ¶
func (emf *EMFMetrics) RecordSuccess(operation string)
RecordSuccess records successful operations
func (*EMFMetrics) RecordThroughput ¶
func (emf *EMFMetrics) RecordThroughput(operation string, count int64)
RecordThroughput records throughput metrics
func (*EMFMetrics) SetEnabled ¶
func (emf *EMFMetrics) SetEnabled(enabled bool)
SetEnabled enables or disables EMF metrics
func (*EMFMetrics) SetProperty ¶
func (emf *EMFMetrics) SetProperty(key string, value interface{})
SetProperty adds a property to the EMF output
func (*EMFMetrics) StartLatencyTimer ¶
func (emf *EMFMetrics) StartLatencyTimer(ctx context.Context, operation string) *LatencyMetric
StartLatencyTimer starts tracking latency for an operation
type EMFMetricsCollector ¶
type EMFMetricsCollector struct {
// contains filtered or unexported fields
}
EMFMetricsCollector implements CloudWatch Embedded Metrics Format for serverless environments It eliminates polling patterns and writes metrics directly to stdout for Lambda integration
func NewEMFMetricsCollector ¶
func NewEMFMetricsCollector(namespace string, logger *zap.Logger) *EMFMetricsCollector
NewEMFMetricsCollector creates a new EMF-based metrics collector optimized for Lambda
func (*EMFMetricsCollector) Flush ¶
func (emc *EMFMetricsCollector) Flush() error
Flush writes all buffered metrics to stdout in EMF format This method is synchronous and safe to call from Lambda handlers
func (*EMFMetricsCollector) GetBufferSize ¶
func (emc *EMFMetricsCollector) GetBufferSize() int
GetBufferSize returns the current buffer size (for monitoring)
func (*EMFMetricsCollector) RecordCost ¶
func (emc *EMFMetricsCollector) RecordCost(operation string, costUSD float64)
RecordCost records operation cost in USD
func (*EMFMetricsCollector) RecordErrorRate ¶
func (emc *EMFMetricsCollector) RecordErrorRate(operation string, errorCount, totalCount int64)
RecordErrorRate records error rate as a percentage
func (*EMFMetricsCollector) RecordLatency ¶
func (emc *EMFMetricsCollector) RecordLatency(operation string, duration time.Duration)
RecordLatency records operation latency - optimized for Lambda use
func (*EMFMetricsCollector) RecordMetric ¶
func (emc *EMFMetricsCollector) RecordMetric(name string, value float64, unit types.StandardUnit, dimensions ...types.Dimension)
RecordMetric records a custom metric with optional additional dimensions This is thread-safe and does not use background goroutines
func (*EMFMetricsCollector) RecordPerformanceMetrics ¶
func (emc *EMFMetricsCollector) RecordPerformanceMetrics(metrics *PerformanceMetrics)
RecordPerformanceMetrics records Lambda runtime performance metrics
func (*EMFMetricsCollector) RecordThroughput ¶
func (emc *EMFMetricsCollector) RecordThroughput(operation string, count int64)
RecordThroughput records operation throughput
func (*EMFMetricsCollector) RemoveDimension ¶
func (emc *EMFMetricsCollector) RemoveDimension(name string)
RemoveDimension removes a default dimension
func (*EMFMetricsCollector) SetDimension ¶
func (emc *EMFMetricsCollector) SetDimension(name, value string)
SetDimension adds or updates a default dimension
func (*EMFMetricsCollector) Stop ¶
func (emc *EMFMetricsCollector) Stop()
Stop is a no-op for EMF collector since there are no background goroutines This maintains interface compatibility with the polling-based collectors
type EMFMetricsService ¶
type EMFMetricsService struct {
// contains filtered or unexported fields
}
EMFMetricsService demonstrates proper EMF metrics integration in a Lambda environment
func ConvertPollingMetricsToEMF ¶
func ConvertPollingMetricsToEMF(oldCollector *MetricsCollector, logger *zap.Logger) *EMFMetricsService
ConvertPollingMetricsToEMF provides a helper to migrate from polling-based metrics
func NewEMFMetricsService ¶
func NewEMFMetricsService(logger *zap.Logger) *EMFMetricsService
NewEMFMetricsService creates a new EMF metrics service for Lambda
func (*EMFMetricsService) FlushMetrics ¶
func (ems *EMFMetricsService) FlushMetrics() error
FlushMetrics manually flushes all metrics - call this at the end of Lambda execution
func (*EMFMetricsService) RecordBusinessMetrics ¶
func (ems *EMFMetricsService) RecordBusinessMetrics(metricName string, value float64, unit string, businessContext map[string]string)
RecordBusinessMetrics records application-specific business metrics
func (*EMFMetricsService) RecordDynamoDBMetrics ¶
func (ems *EMFMetricsService) RecordDynamoDBMetrics(operation, tableName string, duration time.Duration, readUnits, writeUnits float64, err error)
RecordDynamoDBMetrics records DynamoDB operation metrics using EMF
func (*EMFMetricsService) RecordRequestMetrics ¶
func (ems *EMFMetricsService) RecordRequestMetrics(ctx *apptheory.Context, perfMetrics *PerformanceMetrics, statusCode int, requestErr error)
RecordRequestMetrics records comprehensive request metrics using EMF
func (*EMFMetricsService) Stop ¶
func (ems *EMFMetricsService) Stop()
Stop gracefully stops the metrics service (no-op for serverless EMF, but maintains interface compatibility)
type EMFPayload ¶
type EMFPayload struct {
Version string `json:"_aws"`
Timestamp int64 `json:"timestamp"`
LogGroup string `json:"log_group,omitempty"`
LogStream string `json:"log_stream,omitempty"`
Namespace string `json:"namespace"`
Dimensions [][]string `json:"dimensions"`
Metrics []EMFMetricData `json:"metrics"`
Properties map[string]interface{} `json:"properties,omitempty"`
}
EMFPayload represents the complete EMF payload structure
type EnhancedMetric ¶
type EnhancedMetric struct {
Name string `json:"name"`
Type MetricType `json:"type"`
Level MetricLevel `json:"level"`
Value float64 `json:"value"`
Unit string `json:"unit"`
Description string `json:"description"`
Labels map[string]string `json:"labels"`
Timestamp time.Time `json:"timestamp"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
}
EnhancedMetric represents a comprehensive metric with metadata
type EnhancedMetricsCollector ¶
type EnhancedMetricsCollector struct {
// contains filtered or unexported fields
}
EnhancedMetricsCollector collects and aggregates metrics
func GetGlobalMetricsCollector ¶
func GetGlobalMetricsCollector(logger *zap.Logger) *EnhancedMetricsCollector
GetGlobalMetricsCollector returns the global metrics collector
func NewEnhancedMetricsCollector ¶
func NewEnhancedMetricsCollector(logger *zap.Logger) *EnhancedMetricsCollector
NewEnhancedMetricsCollector creates a new enhanced metrics collector
func (*EnhancedMetricsCollector) GetCurrentMetrics ¶
func (c *EnhancedMetricsCollector) GetCurrentMetrics() map[string]*EnhancedMetric
GetCurrentMetrics returns all current metrics
func (*EnhancedMetricsCollector) GetErrorRates ¶
func (c *EnhancedMetricsCollector) GetErrorRates() ErrorRateMetric
GetErrorRates calculates error rates
func (*EnhancedMetricsCollector) GetLatencyPercentiles ¶
func (c *EnhancedMetricsCollector) GetLatencyPercentiles() PercentileMetric
GetLatencyPercentiles calculates latency percentiles
func (*EnhancedMetricsCollector) GetMetricsJSON ¶
func (c *EnhancedMetricsCollector) GetMetricsJSON() ([]byte, error)
GetMetricsJSON returns metrics as JSON
func (*EnhancedMetricsCollector) MarkFlushed ¶
func (c *EnhancedMetricsCollector) MarkFlushed()
MarkFlushed marks metrics as flushed
func (*EnhancedMetricsCollector) RecordCacheHit ¶
func (c *EnhancedMetricsCollector) RecordCacheHit(hit bool, cacheType string)
RecordCacheHit records cache hit/miss
func (*EnhancedMetricsCollector) RecordDynamoDBCapacity ¶
func (c *EnhancedMetricsCollector) RecordDynamoDBCapacity(operation string, consumedRCU, consumedWCU float64)
RecordDynamoDBCapacity records DynamoDB capacity consumption
func (*EnhancedMetricsCollector) RecordError ¶
func (c *EnhancedMetricsCollector) RecordError(errorType string, statusCode int)
RecordError records error occurrences
func (*EnhancedMetricsCollector) RecordLatency ¶
func (c *EnhancedMetricsCollector) RecordLatency(operation string, latency time.Duration)
RecordLatency records request latency
func (*EnhancedMetricsCollector) Reset ¶
func (c *EnhancedMetricsCollector) Reset()
Reset clears all metrics (call after flushing to persistent storage)
func (*EnhancedMetricsCollector) ShouldFlush ¶
func (c *EnhancedMetricsCollector) ShouldFlush() bool
ShouldFlush returns true if metrics should be flushed to storage
type ErrorRateMetric ¶
type ErrorRateMetric struct {
Total int64 `json:"total_requests"`
Errors int64 `json:"error_count"`
ErrorRate float64 `json:"error_rate"`
By4xx int64 `json:"4xx_errors"`
By5xx int64 `json:"5xx_errors"`
ByType map[string]int64 `json:"errors_by_type"`
}
ErrorRateMetric represents error rate metrics
type EscalationRule ¶
type EscalationRule struct {
Level int `json:"level"` // 1, 2, 3, etc.
TriggerAfter time.Duration `json:"trigger_after"` // Escalate after this duration
AdditionalChannels []string `json:"additional_channels"` // Additional delivery channels
RequiresManualAck bool `json:"requires_manual_ack"` // Requires manual acknowledgment
}
EscalationRule defines alert escalation behavior
type HTTPLatencyTracker ¶
type HTTPLatencyTracker struct {
// contains filtered or unexported fields
}
HTTPLatencyTracker provides a simple interface for tracking HTTP latencies
func NewHTTPLatencyTracker ¶
func NewHTTPLatencyTracker(recorder MetricsRecorder, serviceName string, logger *zap.Logger) *HTTPLatencyTracker
NewHTTPLatencyTracker creates a simple HTTP latency tracker
func (*HTTPLatencyTracker) TrackRequest ¶
func (hlt *HTTPLatencyTracker) TrackRequest(ctx context.Context, method, url string, statusCode int, duration time.Duration)
TrackRequest tracks an HTTP request latency
type HTTPMetrics ¶
type HTTPMetrics struct {
URL string
Method string
StatusCode int
RequestSize int64
ResponseSize int64
DNSTime time.Duration
TCPTime time.Duration
TLSTime time.Duration
FirstByteTime time.Duration
TotalTime time.Duration
Success bool
ErrorType string
RetryAttempts int
}
HTTPMetrics represents detailed HTTP call metrics
type HTTPTracker ¶
type HTTPTracker struct {
// contains filtered or unexported fields
}
HTTPTracker wraps HTTP client operations with comprehensive latency tracking
func NewHTTPTracker ¶
func NewHTTPTracker(client *http.Client, logger *zap.Logger, recorder MetricsRecorder, serviceName string) *HTTPTracker
NewHTTPTracker creates a new HTTP client with latency tracking
func (*HTTPTracker) Do ¶
func (ht *HTTPTracker) Do(ctx context.Context, req *http.Request) (*http.Response, *HTTPMetrics, error)
Do executes an HTTP request with comprehensive tracking
func (*HTTPTracker) DoFederation ¶
func (ht *HTTPTracker) DoFederation(ctx context.Context, req *http.Request, targetInstance string) (*http.Response, *HTTPMetrics, error)
DoFederation executes a federation HTTP request with specialized tracking
type HealthCheck ¶
type HealthCheck struct {
Name string `json:"name"`
Status string `json:"status"`
Message string `json:"message,omitempty"`
LastCheck time.Time `json:"last_check"`
Duration time.Duration `json:"duration"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
}
HealthCheck represents a health check result
type HealthChecker ¶
type HealthChecker struct {
// contains filtered or unexported fields
}
HealthChecker manages health checks for various components
func NewHealthChecker ¶
func NewHealthChecker(logger *zap.Logger, cfg aws.Config, service, version string, healthConfig *HealthConfig) *HealthChecker
NewHealthChecker creates a new health checker
func (*HealthChecker) DetailedHandler ¶
func (hc *HealthChecker) DetailedHandler(w http.ResponseWriter, r *http.Request)
DetailedHandler implements GET /health/detailed endpoint
func (*HealthChecker) LivenessHandler ¶
func (hc *HealthChecker) LivenessHandler(w http.ResponseWriter, _ *http.Request)
LivenessHandler implements GET /health/live endpoint
func (*HealthChecker) ReadinessHandler ¶
func (hc *HealthChecker) ReadinessHandler(w http.ResponseWriter, r *http.Request)
ReadinessHandler implements GET /health/ready endpoint
func (*HealthChecker) RegisterHealthRoutes ¶
func (hc *HealthChecker) RegisterHealthRoutes(mux *http.ServeMux)
RegisterHealthRoutes registers health check routes with an HTTP mux
type HealthConfig ¶
type HealthConfig struct {
TableName string
QueueURL string
CheckTimeout time.Duration
CacheTimeout time.Duration
DependencyChecks bool
}
HealthConfig contains configuration for health checks
type HealthResponse ¶
type HealthResponse struct {
Status string `json:"status"`
Timestamp time.Time `json:"timestamp"`
Version string `json:"version"`
Service string `json:"service"`
Region string `json:"region"`
Checks []HealthCheck `json:"checks,omitempty"`
Summary map[string]interface{} `json:"summary,omitempty"`
}
HealthResponse represents the overall health response
type HistoricalMetricsReader ¶
type HistoricalMetricsReader interface {
GetMetricsByService(ctx context.Context, serviceName string, startTime, endTime time.Time) ([]*models.MetricRecord, error)
}
HistoricalMetricsReader provides access to historical metrics data
type HorizontalAnnotation ¶
type HorizontalAnnotation struct {
Value float64 `json:"value"`
Label string `json:"label,omitempty"`
Color string `json:"color,omitempty"`
Fill string `json:"fill,omitempty"`
Visible bool `json:"visible,omitempty"`
}
HorizontalAnnotation represents a horizontal line annotation
type LatencyAggregator ¶
type LatencyAggregator struct {
// contains filtered or unexported fields
}
LatencyAggregator handles real-time latency aggregation and percentile calculations
func NewLatencyAggregator ¶
func NewLatencyAggregator(logger *zap.Logger, recorder MetricsRecorder, options ...LatencyAggregatorOption) *LatencyAggregator
NewLatencyAggregator creates a new latency aggregator
func (*LatencyAggregator) GetAggregatedStats ¶
func (la *LatencyAggregator) GetAggregatedStats(service string, timeWindow time.Duration) (map[string]*LatencyStats, error)
GetAggregatedStats returns aggregated statistics for multiple operations
func (*LatencyAggregator) GetCurrentStats ¶
func (la *LatencyAggregator) GetCurrentStats(operation, service string) (*LatencyStats, error)
GetCurrentStats returns current latency statistics for an operation
func (*LatencyAggregator) GetLatencyTrend ¶
func (la *LatencyAggregator) GetLatencyTrend(ctx context.Context, operation, service string, startTime, endTime time.Time, interval time.Duration) (*LatencyTrend, error)
GetLatencyTrend returns latency trend analysis over a time period
func (*LatencyAggregator) RecordLatency ¶
func (la *LatencyAggregator) RecordLatency(operation, service string, duration time.Duration)
RecordLatency records a latency measurement
func (*LatencyAggregator) Start ¶
func (la *LatencyAggregator) Start()
Start begins the aggregation process
func (*LatencyAggregator) Stop ¶
func (la *LatencyAggregator) Stop()
Stop stops the aggregation process
type LatencyAggregatorOption ¶
type LatencyAggregatorOption func(*LatencyAggregator)
LatencyAggregatorOption configures the latency aggregator
func WithAggregateInterval ¶
func WithAggregateInterval(interval time.Duration) LatencyAggregatorOption
WithAggregateInterval sets the aggregation window interval
func WithCloudWatch ¶
func WithCloudWatch(_ interface{}, namespace string) LatencyAggregatorOption
WithCloudWatch configures CloudWatch integration
func WithMaxBuckets ¶
func WithMaxBuckets(maxBuckets int) LatencyAggregatorOption
WithMaxBuckets sets the maximum number of buckets to keep
func WithMetricsRepository ¶
func WithMetricsRepository(reader HistoricalMetricsReader) LatencyAggregatorOption
WithMetricsRepository sets the historical metrics repository
func WithRetentionPeriod ¶
func WithRetentionPeriod(period time.Duration) LatencyAggregatorOption
WithRetentionPeriod sets how long to keep data in memory
type LatencyAlerter ¶
type LatencyAlerter struct {
// contains filtered or unexported fields
}
LatencyAlerter manages real-time latency alerting
func NewLatencyAlerter ¶
func NewLatencyAlerter(logger *zap.Logger, recorder MetricsRecorder, webhookDelivery *WebhookDeliveryService) *LatencyAlerter
NewLatencyAlerter creates a new latency alerter
func (*LatencyAlerter) AddRule ¶
func (la *LatencyAlerter) AddRule(rule *AlertRule)
AddRule adds an alert rule
func (*LatencyAlerter) CheckLatency ¶
func (la *LatencyAlerter) CheckLatency(ctx context.Context, operation, service string, latencyMs float64, p95Ms, p99Ms float64)
CheckLatency checks latency against all applicable rules
func (*LatencyAlerter) GetAlertHistory ¶
func (la *LatencyAlerter) GetAlertHistory() map[string]*AlertHistory
GetAlertHistory returns alert history
func (*LatencyAlerter) GetAlertRules ¶
func (la *LatencyAlerter) GetAlertRules() map[string]*AlertRule
GetAlertRules returns all configured alert rules
func (*LatencyAlerter) RemoveRule ¶
func (la *LatencyAlerter) RemoveRule(ruleName string)
RemoveRule removes an alert rule
func (*LatencyAlerter) SetEnabled ¶
func (la *LatencyAlerter) SetEnabled(enabled bool)
SetEnabled enables or disables the alerter
type LatencyBucket ¶
type LatencyBucket struct {
Operation string
Service string
WindowStart time.Time
WindowEnd time.Time
Measurements []float64
Count int64
Sum float64
Min float64
Max float64
// contains filtered or unexported fields
}
LatencyBucket holds latency measurements for a specific operation/time window
type LatencyDataPoint ¶
type LatencyDataPoint struct {
Timestamp time.Time `json:"timestamp"`
Average float64 `json:"average"`
Count int64 `json:"count"`
Percentiles map[string]float64 `json:"percentiles"`
}
LatencyDataPoint represents a single data point in a trend
type LatencyMetric ¶
LatencyMetric represents latency tracking
func (*LatencyMetric) Finish ¶
func (lm *LatencyMetric) Finish(emf *EMFMetrics, success bool)
Finish completes latency tracking and records the metric
func (*LatencyMetric) FinishWithError ¶
func (lm *LatencyMetric) FinishWithError(emf *EMFMetrics, errorType string)
FinishWithError completes latency tracking and records error
type LatencyStats ¶
type LatencyStats struct {
Operation string `json:"operation"`
Service string `json:"service"`
WindowStart time.Time `json:"window_start"`
WindowEnd time.Time `json:"window_end"`
Count int64 `json:"count"`
Sum float64 `json:"sum"`
Average float64 `json:"average"`
Min float64 `json:"min"`
Max float64 `json:"max"`
Percentiles map[string]float64 `json:"percentiles"`
StdDev float64 `json:"std_dev"`
}
LatencyStats represents calculated latency statistics
type LatencyTrend ¶
type LatencyTrend struct {
Operation string `json:"operation"`
Service string `json:"service"`
TimeRange string `json:"time_range"`
DataPoints []LatencyDataPoint `json:"data_points"`
TrendAnalysis TrendAnalysis `json:"trend_analysis"`
Percentiles map[string][]float64 `json:"percentiles"`
}
LatencyTrend represents latency trending over time
type MetricBuffer ¶
type MetricBuffer struct {
// contains filtered or unexported fields
}
MetricBuffer represents a buffer for metrics collection
type MetricLevel ¶
type MetricLevel string
MetricLevel defines the severity level of a metric
const ( MetricLevelCritical MetricLevel = "critical" MetricLevelHigh MetricLevel = "high" MetricLevelMedium MetricLevel = "medium" MetricLevelLow MetricLevel = "low" )
MetricLevel values
type MetricType ¶
type MetricType string
MetricType defines the type of metric
const ( MetricTypeCounter MetricType = "counter" MetricTypeGauge MetricType = "gauge" MetricTypeTimer MetricType = "timer" MetricTypeHistogram MetricType = "histogram" )
MetricType values
type MetricsCollector ¶
type MetricsCollector struct {
// contains filtered or unexported fields
}
MetricsCollector aggregates and publishes custom metrics
func NewMetricsCollector ¶
func NewMetricsCollector(client cloudWatchPutMetricDataAPI, namespace string, logger *zap.Logger) *MetricsCollector
NewMetricsCollector creates a new metrics collector
func (*MetricsCollector) Flush ¶
func (mc *MetricsCollector) Flush()
Flush manually flushes all accumulated metrics to CloudWatch. This should be called before Lambda function returns to ensure metrics are sent. In serverless environments, call this at the end of each request handler.
func (*MetricsCollector) RecordCost ¶
func (mc *MetricsCollector) RecordCost(operation string, costUSD float64)
RecordCost records operation cost
func (*MetricsCollector) RecordErrorRate ¶
func (mc *MetricsCollector) RecordErrorRate(operation string, errorCount, totalCount int64)
RecordErrorRate records error rate
func (*MetricsCollector) RecordLatency ¶
func (mc *MetricsCollector) RecordLatency(operation string, duration time.Duration)
RecordLatency records operation latency
func (*MetricsCollector) RecordMetric ¶
func (mc *MetricsCollector) RecordMetric(name string, value float64, unit types.StandardUnit, dimensions ...types.Dimension)
RecordMetric records a custom metric
func (*MetricsCollector) RecordPerformanceMetrics ¶
func (mc *MetricsCollector) RecordPerformanceMetrics(metrics *PerformanceMetrics)
RecordPerformanceMetrics records runtime performance metrics
func (*MetricsCollector) RecordThroughput ¶
func (mc *MetricsCollector) RecordThroughput(operation string, count int64)
RecordThroughput records operation throughput
type MetricsRecorder ¶
type MetricsRecorder interface {
RecordLatency(ctx context.Context, operation, table string, duration time.Duration, success bool, dimensions map[string]string) error
}
MetricsRecorder interface for recording latency metrics
type MonitoringService ¶
type MonitoringService struct {
// contains filtered or unexported fields
}
MonitoringService provides centralized monitoring and alerting configuration
func NewMonitoringService ¶
func NewMonitoringService(monitoringConfig *MonitoringServiceConfig) (*MonitoringService, error)
NewMonitoringService creates a new monitoring service with comprehensive configuration
func NewMonitoringServiceFromEnv ¶
NewMonitoringServiceFromEnv creates a monitoring service from centralized configuration
func (*MonitoringService) AddAlertRoute ¶
func (ms *MonitoringService) AddAlertRoute(route *AlertRoute)
AddAlertRoute adds a new alert route
func (*MonitoringService) CheckCost ¶
func (ms *MonitoringService) CheckCost(ctx context.Context, costMicroCents float64)
CheckCost monitors costs and sends alerts
func (*MonitoringService) CheckErrorRate ¶
func (ms *MonitoringService) CheckErrorRate(ctx context.Context, service string, errorRate float64)
CheckErrorRate monitors error rates and sends alerts
func (*MonitoringService) CheckHealth ¶
func (ms *MonitoringService) CheckHealth(ctx context.Context, service string, isHealthy bool, errorMsg string)
CheckHealth monitors service health and sends alerts
func (*MonitoringService) CheckLatency ¶
func (ms *MonitoringService) CheckLatency(ctx context.Context, service, operation string, latencyMs, p95Ms, p99Ms float64)
CheckLatency monitors latency and sends alerts
func (*MonitoringService) CheckSecurity ¶
func (ms *MonitoringService) CheckSecurity(ctx context.Context, eventType, severity string, details map[string]interface{})
CheckSecurity monitors security events and sends alerts
func (*MonitoringService) Cleanup ¶
func (ms *MonitoringService) Cleanup(ctx context.Context) error
Cleanup performs maintenance tasks
func (*MonitoringService) GetActiveAlerts ¶
func (ms *MonitoringService) GetActiveAlerts(ctx context.Context, limit int) ([]*AlertSummary, error)
GetActiveAlerts retrieves currently active alerts
func (*MonitoringService) ProcessRetries ¶
func (ms *MonitoringService) ProcessRetries(ctx context.Context) error
ProcessRetries processes failed alert deliveries
func (*MonitoringService) RemoveAlertRoute ¶
func (ms *MonitoringService) RemoveAlertRoute(routeName string)
RemoveAlertRoute removes an alert route
func (*MonitoringService) SendAlert ¶
func (ms *MonitoringService) SendAlert(ctx context.Context, alertReq *AlertRequest) error
SendAlert routes an alert based on configured rules
type MonitoringServiceConfig ¶
type MonitoringServiceConfig struct {
Logger *zap.Logger
DB core.DB
TableName string
CostService *cost.TrackingService
// AWS services
SNSClient *sns.Client
SNSTopicArn string
// Webhook configuration
WebhookURL string
WebhookHeaders map[string]string
// Service identification
Environment string
ServiceName string
Region string
// Alert routing
AlertRoutes []*AlertRoute
DefaultRoute *AlertRoute
Enabled bool
}
MonitoringServiceConfig contains configuration for the monitoring service
type PercentileMetric ¶
type PercentileMetric struct {
P50 float64 `json:"p50"`
P75 float64 `json:"p75"`
P90 float64 `json:"p90"`
P95 float64 `json:"p95"`
P99 float64 `json:"p99"`
}
PercentileMetric represents percentile-based metrics
type PerformanceMetric ¶
type PerformanceMetric struct {
Operation string
TotalTime time.Duration
CallCount int64
MaxTime time.Duration
MinTime time.Duration
LastUpdated time.Time
OverheadPercent float64
}
PerformanceMetric tracks the performance impact of observability operations
type PerformanceMetrics ¶
type PerformanceMetrics struct {
ColdStartDuration time.Duration
ExecutionDuration time.Duration
MemoryUsed int64
MemoryAllocated int64
CPUUtilization float64
GoroutineCount int
GCPauseTime time.Duration
}
PerformanceMetrics contains runtime performance data
func GetPerformanceMetrics ¶
func GetPerformanceMetrics(startTime time.Time, initTime time.Time) *PerformanceMetrics
GetPerformanceMetrics collects current runtime performance metrics
type PerformanceOptimization ¶
type PerformanceOptimization struct {
// contains filtered or unexported fields
}
PerformanceOptimization contains guidelines and utilities for minimal overhead observability
func NewPerformanceOptimization ¶
func NewPerformanceOptimization(logger *zap.Logger) *PerformanceOptimization
NewPerformanceOptimization creates a new performance monitoring instance
func (*PerformanceOptimization) GetMetrics ¶
func (po *PerformanceOptimization) GetMetrics() map[string]PerformanceMetric
GetMetrics returns current performance metrics
func (*PerformanceOptimization) GetPerformanceReport ¶
func (po *PerformanceOptimization) GetPerformanceReport() map[string]interface{}
GetPerformanceReport generates a comprehensive performance report
func (*PerformanceOptimization) LogPerformanceSummary ¶
func (po *PerformanceOptimization) LogPerformanceSummary()
LogPerformanceSummary logs a summary of observability performance impact
func (*PerformanceOptimization) TrackOperation ¶
func (po *PerformanceOptimization) TrackOperation(operation string, duration time.Duration, businessDuration time.Duration)
TrackOperation tracks the performance impact of an observability operation
func (*PerformanceOptimization) ValidatePerformanceTargets ¶
func (po *PerformanceOptimization) ValidatePerformanceTargets() []string
ValidatePerformanceTargets checks if current performance meets targets
type ScheduleConfig ¶
type ScheduleConfig struct {
BusinessHoursOnly bool `json:"business_hours_only"`
Timezone string `json:"timezone"`
BusinessHours string `json:"business_hours"` // e.g., "9:00-17:00"
BusinessDays []string `json:"business_days"` // e.g., ["mon", "tue", "wed", "thu", "fri"]
SuppressDuring []string `json:"suppress_during"` // Maintenance windows
}
ScheduleConfig controls when alerts are sent
type StandaloneAlertRepository ¶
type StandaloneAlertRepository struct {
// contains filtered or unexported fields
}
StandaloneAlertRepository provides CRUD operations for alerts using DynamORM without import cycles
func NewStandaloneAlertRepository ¶
func NewStandaloneAlertRepository(db core.DB, tableName string, logger *zap.Logger, costService dynamoCostTracker) *StandaloneAlertRepository
NewStandaloneAlertRepository creates a new standalone alert repository
func (*StandaloneAlertRepository) CleanupOldAlerts ¶
func (r *StandaloneAlertRepository) CleanupOldAlerts(ctx context.Context, olderThan time.Duration) (int, error)
CleanupOldAlerts removes alerts older than the specified duration
func (*StandaloneAlertRepository) CreateAlert ¶
CreateAlert creates a new alert
func (*StandaloneAlertRepository) GetActiveAlerts ¶
func (r *StandaloneAlertRepository) GetActiveAlerts(ctx context.Context, limit int) ([]*models.Alert, error)
GetActiveAlerts retrieves all currently active (firing) alerts
func (*StandaloneAlertRepository) GetAlertsNeedingRetry ¶
func (r *StandaloneAlertRepository) GetAlertsNeedingRetry(ctx context.Context, limit int) ([]*models.Alert, error)
GetAlertsNeedingRetry retrieves alerts that need delivery retry
func (*StandaloneAlertRepository) GetByID ¶
func (r *StandaloneAlertRepository) GetByID(ctx context.Context, alertID string) (*models.Alert, error)
GetByID retrieves an alert by its ID
func (*StandaloneAlertRepository) ResolveAlert ¶
func (r *StandaloneAlertRepository) ResolveAlert(ctx context.Context, alertID string) error
ResolveAlert marks an alert as resolved
type StandaloneDeadLetterRepository ¶
type StandaloneDeadLetterRepository struct {
// contains filtered or unexported fields
}
StandaloneDeadLetterRepository provides dead letter message operations
func NewStandaloneDeadLetterRepository ¶
func NewStandaloneDeadLetterRepository(db core.DB, tableName string, logger *zap.Logger) *StandaloneDeadLetterRepository
NewStandaloneDeadLetterRepository creates a new dead letter repository
func (*StandaloneDeadLetterRepository) Create ¶
func (r *StandaloneDeadLetterRepository) Create(ctx context.Context, message *models.DeadLetterMessage) error
Create creates a new dead letter message
func (*StandaloneDeadLetterRepository) GetByType ¶
func (r *StandaloneDeadLetterRepository) GetByType(ctx context.Context, messageType string, limit int) ([]*models.DeadLetterMessage, error)
GetByType retrieves dead letter messages by type
type StandaloneWebhookRepository ¶
type StandaloneWebhookRepository struct {
// contains filtered or unexported fields
}
StandaloneWebhookRepository provides webhook delivery operations without import cycles
func NewStandaloneWebhookRepository ¶
func NewStandaloneWebhookRepository(db core.DB, tableName string, logger *zap.Logger) *StandaloneWebhookRepository
NewStandaloneWebhookRepository creates a new standalone webhook repository
func (*StandaloneWebhookRepository) CreateDelivery ¶
func (r *StandaloneWebhookRepository) CreateDelivery(ctx context.Context, delivery *models.WebhookDelivery) error
CreateDelivery creates a new webhook delivery record
func (*StandaloneWebhookRepository) GetDeliveriesByAlert ¶
func (r *StandaloneWebhookRepository) GetDeliveriesByAlert(ctx context.Context, alertID string, limit int) ([]*models.WebhookDelivery, error)
GetDeliveriesByAlert retrieves webhook deliveries for a specific alert
func (*StandaloneWebhookRepository) GetPendingRetries ¶
func (r *StandaloneWebhookRepository) GetPendingRetries(ctx context.Context, limit int) ([]*models.WebhookDelivery, error)
GetPendingRetries retrieves webhook deliveries that need retry
func (*StandaloneWebhookRepository) UpdateDelivery ¶
func (r *StandaloneWebhookRepository) UpdateDelivery(ctx context.Context, delivery *models.WebhookDelivery) error
UpdateDelivery updates a webhook delivery record
type ThrottleConfig ¶
type ThrottleConfig struct {
MaxAlertsPerMinute int `json:"max_alerts_per_minute"`
MaxAlertsPerHour int `json:"max_alerts_per_hour"`
CooldownPeriod time.Duration `json:"cooldown_period"`
GroupBy []string `json:"group_by"` // Fields to group alerts by for throttling
}
ThrottleConfig controls alert throttling
type TraceContext ¶
type TraceContext struct {
TraceID string
SegmentID string
ParentID string
Sampled bool
RequestID string
UserID string
TenantID string
// contains filtered or unexported fields
}
TraceContext represents a distributed trace context
func (*TraceContext) GetProperty ¶
func (tc *TraceContext) GetProperty(key string) (interface{}, bool)
GetProperty gets a property from the trace context
func (*TraceContext) SetProperty ¶
func (tc *TraceContext) SetProperty(key string, value interface{})
SetProperty adds a property to the trace context
type TracingConfig ¶
type TracingConfig struct {
ServiceName string
ServiceVersion string
SamplingRate float64
DaemonAddress string
UseECS bool
LocalTesting bool
Enabled bool
}
TracingConfig contains configuration for distributed tracing
type TracingManager ¶
type TracingManager struct {
// contains filtered or unexported fields
}
TracingManager manages distributed tracing operations
func NewTracingManager ¶
func NewTracingManager(logger *zap.Logger, config *TracingConfig) *TracingManager
NewTracingManager creates a new tracing manager
func (*TracingManager) AddAnnotation ¶
func (tm *TracingManager) AddAnnotation(ctx context.Context, key string, value interface{})
AddAnnotation adds an annotation to the current segment
func (*TracingManager) AddError ¶
func (tm *TracingManager) AddError(ctx context.Context, err error, remote bool)
AddError adds an error to the current segment
func (*TracingManager) AddMetadata ¶
func (tm *TracingManager) AddMetadata(ctx context.Context, namespace string, data map[string]interface{})
AddMetadata adds metadata to the current segment
func (*TracingManager) CreateTracingMiddleware ¶
func (tm *TracingManager) CreateTracingMiddleware() func(next func(ctx context.Context) error) func(ctx context.Context) error
CreateTracingMiddleware creates middleware for automatic tracing
func (*TracingManager) ExtractTraceHeaders ¶
func (tm *TracingManager) ExtractTraceHeaders(headers map[string]string) *TraceContext
ExtractTraceHeaders extracts trace information from headers
func (*TracingManager) GetTraceContext ¶
func (tm *TracingManager) GetTraceContext(ctx context.Context) *TraceContext
GetTraceContext extracts trace context from the current segment
func (*TracingManager) InjectTraceHeaders ¶
func (tm *TracingManager) InjectTraceHeaders(ctx context.Context, headers map[string]string)
InjectTraceHeaders injects trace headers into a map for propagation
func (*TracingManager) IsEnabled ¶
func (tm *TracingManager) IsEnabled() bool
IsEnabled returns whether tracing is enabled
func (*TracingManager) SetHTTPRequest ¶
func (tm *TracingManager) SetHTTPRequest(ctx context.Context, method, url string, userAgent string, clientIP string)
SetHTTPRequest adds HTTP request information to the segment
func (*TracingManager) SetHTTPResponse ¶
func (tm *TracingManager) SetHTTPResponse(ctx context.Context, statusCode int, contentLength int64)
SetHTTPResponse adds HTTP response information to the segment
func (*TracingManager) SetUser ¶
func (tm *TracingManager) SetUser(ctx context.Context, userID string)
SetUser adds user information to the segment
func (*TracingManager) StartSegment ¶
func (tm *TracingManager) StartSegment(ctx context.Context, name string) (context.Context, *xray.Segment)
StartSegment starts a new X-Ray segment
func (*TracingManager) StartSubsegment ¶
func (tm *TracingManager) StartSubsegment(ctx context.Context, name string) (context.Context, *xray.Segment)
StartSubsegment starts a new X-Ray subsegment
func (*TracingManager) TraceDatabase ¶
func (tm *TracingManager) TraceDatabase(ctx context.Context, operation string, tableName string, queryFunc func(ctx context.Context) error) error
TraceDatabase traces a database operation
func (*TracingManager) TraceExternalCall ¶
func (tm *TracingManager) TraceExternalCall(ctx context.Context, serviceName string, method string, url string, callFunc func(ctx context.Context) error) error
TraceExternalCall traces an external HTTP call
func (*TracingManager) TraceLambdaFunction ¶
func (tm *TracingManager) TraceLambdaFunction(ctx context.Context, functionName string, execFunc func(ctx context.Context) error) error
TraceLambdaFunction traces a Lambda function execution
type TrendAnalysis ¶
type TrendAnalysis struct {
Slope float64 `json:"slope"` // Trend direction (positive = increasing)
RSquared float64 `json:"r_squared"` // Trend strength (0-1)
TrendDirection string `json:"trend_direction"` // "increasing", "decreasing", "stable"
PercentChange float64 `json:"percent_change"` // Change from first to last data point
Volatility float64 `json:"volatility"` // Standard deviation of changes
IsSignificant bool `json:"is_significant"` // Whether trend is statistically significant
ChangeClassification string `json:"change_classification"` // "significant_improvement", "significant_degradation", "stable"
}
TrendAnalysis provides statistical analysis of latency trends
type VerticalAnnotation ¶
type VerticalAnnotation struct {
Value string `json:"value"`
Label string `json:"label,omitempty"`
Color string `json:"color,omitempty"`
Visible bool `json:"visible,omitempty"`
}
VerticalAnnotation represents a vertical line annotation
type WebhookConfig ¶
type WebhookConfig struct {
ID string `json:"id"`
URL string `json:"url"`
Headers map[string]string `json:"headers"`
Timeout time.Duration `json:"timeout"`
MaxAttempts int `json:"max_attempts"`
RetryInterval time.Duration `json:"retry_interval"`
SecretToken string `json:"secret_token,omitempty"`
VerifySSL bool `json:"verify_ssl"`
Enabled bool `json:"enabled"`
AlertTypes []string `json:"alert_types,omitempty"` // Filter by alert types
SeverityLevels []string `json:"severity_levels,omitempty"` // Filter by severity
Services []string `json:"services,omitempty"` // Filter by services
}
WebhookConfig contains webhook endpoint configuration
type WebhookDeliveryConfig ¶
type WebhookDeliveryConfig struct {
Logger *zap.Logger
WebhookRepository *StandaloneWebhookRepository
AlertRepository *StandaloneAlertRepository
DeadLetterRepository *StandaloneDeadLetterRepository
HTTPTimeout time.Duration
MaxAttempts int
RetryInterval time.Duration
Enabled bool
}
WebhookDeliveryConfig contains configuration for webhook delivery service
type WebhookDeliveryService ¶
type WebhookDeliveryService struct {
// contains filtered or unexported fields
}
WebhookDeliveryService handles webhook delivery with retry logic and dead letter handling
func NewWebhookDeliveryService ¶
func NewWebhookDeliveryService(config *WebhookDeliveryConfig) *WebhookDeliveryService
NewWebhookDeliveryService creates a new webhook delivery service
func (*WebhookDeliveryService) DeliverAlert ¶
DeliverAlert delivers an alert to all configured webhooks
func (*WebhookDeliveryService) RetryFailedDeliveries ¶
func (w *WebhookDeliveryService) RetryFailedDeliveries(ctx context.Context) error
RetryFailedDeliveries processes failed deliveries that are ready for retry
type XRayConfig ¶
XRayConfig contains configuration for X-Ray Lambda middleware
func NewXRayConfig ¶
func NewXRayConfig(serviceName, serviceVersion string) *XRayConfig
NewXRayConfig creates X-Ray configuration with defaults
type YAxisConfig ¶
type YAxisConfig struct {
Left *AxisConfig `json:"left,omitempty"`
Right *AxisConfig `json:"right,omitempty"`
}
YAxisConfig configures the Y-axis of a widget
Source Files
¶
- alert_repository_standalone.go
- alerting.go
- constants.go
- dashboards.go
- dynamorm_tracker.go
- emf.go
- emf_integration_example.go
- emf_metrics.go
- enhanced_metrics.go
- errors.go
- health.go
- http_tracker.go
- latency_aggregator.go
- latency_alerting.go
- metrics.go
- monitoring_service.go
- performance.go
- tracing.go
- webhook_delivery.go
- xray_middleware.go