Documentation
¶
Index ¶
- type ActiveAlert
- type AlertManager
- type AlertSummary
- type AlertThresholds
- type AvailabilityCalculation
- type AvailabilityCalculator
- func (ac *AvailabilityCalculator) GetCalculation(entityID, entityType string, window TimeWindow) (*AvailabilityCalculation, bool)
- func (ac *AvailabilityCalculator) GetCalculationHistory(entityID, entityType string, window TimeWindow, since time.Time, ...) []AvailabilityCalculation
- func (ac *AvailabilityCalculator) GetCalculationsForEntity(entityID, entityType string) map[TimeWindow]*AvailabilityCalculation
- func (ac *AvailabilityCalculator) GetSLACompliance(entityID, entityType string, window TimeWindow) (*SLAComplianceStatus, error)
- func (ac *AvailabilityCalculator) Start() error
- func (ac *AvailabilityCalculator) Stop() error
- type AvailabilityCalculatorConfig
- type AvailabilityDimension
- type AvailabilityMetric
- type AvailabilityReport
- type AvailabilityReporter
- func (ar *AvailabilityReporter) CreateDashboard(dashboardID, name string, reportType ReportType, refreshInterval time.Duration) (*Dashboard, error)
- func (ar *AvailabilityReporter) ExportReport(ctx context.Context, reportID string, format ReportFormat) ([]byte, error)
- func (ar *AvailabilityReporter) GenerateReport(ctx context.Context, reportType ReportType, timeWindow TimeWindow, ...) (*AvailabilityReport, error)
- func (ar *AvailabilityReporter) GetDashboard(dashboardID string) (*Dashboard, error)
- func (ar *AvailabilityReporter) GetReport(reportID string) (*AvailabilityReport, error)
- func (ar *AvailabilityReporter) GetReports() []*AvailabilityReport
- func (ar *AvailabilityReporter) Start() error
- func (ar *AvailabilityReporter) Stop() error
- func (ar *AvailabilityReporter) SubscribeToDashboard(dashboardID string) (<-chan *Dashboard, error)
- type AvailabilityState
- type AvailabilityStatus
- type AvailabilitySummary
- type BudgetAlertThresholds
- type BusinessHours
- type BusinessHoursConfig
- type BusinessImpact
- type CapacityForecast
- type CascadeFailureAnalysis
- type ChaosCheckExecutor
- type CheckConfig
- type CheckExecutor
- func NewChaosCheckExecutor() CheckExecutor
- func NewDatabaseCheckExecutor() CheckExecutor
- func NewExternalServiceExecutor(client *http.Client) CheckExecutor
- func NewHTTPCheckExecutor(client *http.Client) CheckExecutor
- func NewIntentFlowExecutor(client *http.Client, endpoint, token string) CheckExecutor
- type CircuitBreakerConfig
- type CircuitBreakerState
- type CompliancePoint
- type ComplianceStatus
- type ComponentAvailability
- type ComponentConfig
- type ComponentHealthCollector
- type ComponentStatus
- type Dashboard
- type DashboardPanel
- type DatabaseCheckExecutor
- type Dependency
- type DependencyAvailability
- type DependencyChain
- type DependencyChainTracker
- func (dct *DependencyChainTracker) AddChain(chain *DependencyChain) error
- func (dct *DependencyChainTracker) AnalyzeImpact(serviceName string) ([]string, error)
- func (dct *DependencyChainTracker) GetAllChains() []*DependencyChain
- func (dct *DependencyChainTracker) GetChainStatus(chainID string) (string, error)
- func (dct *DependencyChainTracker) GetCriticalPath() ([]string, error)
- func (dct *DependencyChainTracker) GetServiceStatus(serviceName string) (*DependencyServiceStatus, error)
- func (dct *DependencyChainTracker) RemoveChain(chainID string) error
- func (dct *DependencyChainTracker) Start() error
- func (dct *DependencyChainTracker) Stop() error
- func (dct *DependencyChainTracker) UpdateStatus(serviceName, status string, responseTime time.Duration, failureReason string) error
- type DependencyGraph
- type DependencyHealth
- type DependencyHealthCheck
- type DependencyServiceStatus
- type DependencyStatus
- type DependencyTracker
- func (dt *DependencyTracker) AddDependency(dep *Dependency) error
- func (dt *DependencyTracker) GetAllDependencyHealth() map[string]*DependencyHealth
- func (dt *DependencyTracker) GetAvailabilityMetrics(dependencyID string) (*AvailabilityMetric, error)
- func (dt *DependencyTracker) GetCascadeAnalysis(since time.Time) []CascadeFailureAnalysis
- func (dt *DependencyTracker) GetCircuitBreakerStates() map[string]*CircuitBreakerState
- func (dt *DependencyTracker) GetDependencyHealth(dependencyID string) (*DependencyHealth, bool)
- func (dt *DependencyTracker) Start() error
- func (dt *DependencyTracker) Stop() error
- type DependencyTrackerConfig
- type DependencyType
- type ErrorBudget
- type ErrorBudgetStatus
- type ExternalServiceExecutor
- type FailureMode
- type FailureProbability
- type HTTPCheckExecutor
- type HealthCheckResult
- type Incident
- type IncidentSummary
- type IntentFlowExecutor
- type IntentFlowStep
- type LiveDashboardUpdater
- type MaintenanceWindow
- type MetricCollector
- type MultiDimensionalTracker
- func (t *MultiDimensionalTracker) GetCurrentState() *AvailabilityState
- func (t *MultiDimensionalTracker) GetMetricsByBusinessImpact(impact BusinessImpact) []*AvailabilityMetric
- func (t *MultiDimensionalTracker) GetMetricsByDimension(dimension AvailabilityDimension) []*AvailabilityMetric
- func (t *MultiDimensionalTracker) GetMetricsHistory(since, until time.Time) []AvailabilityMetric
- func (t *MultiDimensionalTracker) Start() error
- func (t *MultiDimensionalTracker) Stop() error
- type PanelPosition
- type PredictiveInsights
- type ReportFormat
- type ReportType
- type ReporterConfig
- type RiskFactor
- type SLAComplianceReport
- type SLAComplianceStatus
- type SLARequirements
- type SLATarget
- type SLATargetConfig
- type SeasonalPattern
- type ServiceAvailability
- type ServiceDependency
- type ServiceEndpointConfig
- type ServiceLayer
- type ServiceLayerCollector
- type StepResult
- type SyntheticCheck
- type SyntheticCheckStatus
- type SyntheticCheckType
- type SyntheticMonitor
- func (sm *SyntheticMonitor) AddCheck(check *SyntheticCheck) error
- func (sm *SyntheticMonitor) GetAvailabilityMetrics(checkID string, since, until time.Time) (*AvailabilityMetric, error)
- func (sm *SyntheticMonitor) GetCheck(checkID string) (*SyntheticCheck, bool)
- func (sm *SyntheticMonitor) GetResults(since, until time.Time) []SyntheticResult
- func (sm *SyntheticMonitor) GetResultsByCheck(checkID string, since, until time.Time) []SyntheticResult
- func (sm *SyntheticMonitor) ListChecks() []*SyntheticCheck
- func (sm *SyntheticMonitor) RemoveCheck(checkID string) error
- func (sm *SyntheticMonitor) Start() error
- func (sm *SyntheticMonitor) Stop() error
- type SyntheticMonitorConfig
- type SyntheticResult
- type TimeWindow
- type TrackerConfig
- type TrendAnalysis
- type UserJourneyAvailability
- type UserJourneyCollector
- type UserJourneyConfig
- type UserJourneyStep
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type ActiveAlert ¶
type ActiveAlert struct {
ID string `json:"id"`
Rule string `json:"rule"`
Severity string `json:"severity"`
Status string `json:"status"`
StartTime time.Time `json:"start_time"`
Duration time.Duration `json:"duration"`
Service string `json:"service"`
Component string `json:"component"`
Description string `json:"description"`
Value float64 `json:"value"`
Threshold float64 `json:"threshold"`
Runbook string `json:"runbook"`
}
type AlertManager ¶
type AlertManager interface {
SendAlert(ctx context.Context, check *SyntheticCheck, result *SyntheticResult) error
EvaluateThresholds(ctx context.Context, check *SyntheticCheck, results []SyntheticResult) (bool, error)
}
type AlertSummary ¶
type AlertSummary struct {
ID string `json:"id"`
AlertName string `json:"alert_name"`
Severity string `json:"severity"`
Timestamp time.Time `json:"timestamp"`
ResolvedAt *time.Time `json:"resolved_at,omitempty"`
Duration time.Duration `json:"duration"`
Service string `json:"service"`
Component string `json:"component"`
Description string `json:"description"`
Impact BusinessImpact `json:"impact"`
}
type AlertThresholds ¶
type AlertThresholds struct {
AvailabilityWarning float64 `json:"availability_warning"` // 0-1
AvailabilityCritical float64 `json:"availability_critical"` // 0-1
ErrorBudgetWarning float64 `json:"error_budget_warning"` // 0-1 (utilization)
ErrorBudgetCritical float64 `json:"error_budget_critical"` // 0-1 (utilization)
ResponseTimeWarning time.Duration `json:"response_time_warning"`
ResponseTimeCritical time.Duration `json:"response_time_critical"`
ErrorRateWarning float64 `json:"error_rate_warning"` // 0-1
ErrorRateCritical float64 `json:"error_rate_critical"` // 0-1
ResponseTime float64 `json:"response_time"` // milliseconds
ErrorRate float64 `json:"error_rate"` // percentage
}
type AvailabilityCalculation ¶
type AvailabilityCalculation struct {
EntityID string `json:"entity_id"`
EntityType string `json:"entity_type"`
Dimension AvailabilityDimension `json:"dimension"`
TimeWindow TimeWindow `json:"time_window"`
StartTime time.Time `json:"start_time"`
EndTime time.Time `json:"end_time"`
Availability float64 `json:"availability"` // 0.0 to 1.0
Uptime time.Duration `json:"uptime"`
Downtime time.Duration `json:"downtime"`
TotalTime time.Duration `json:"total_time"`
MeanResponseTime time.Duration `json:"mean_response_time"`
P50ResponseTime time.Duration `json:"p50_response_time"`
P95ResponseTime time.Duration `json:"p95_response_time"`
P99ResponseTime time.Duration `json:"p99_response_time"`
ErrorRate float64 `json:"error_rate"` // 0.0 to 1.0
TotalRequests int64 `json:"total_requests"`
SuccessfulRequests int64 `json:"successful_requests"`
FailedRequests int64 `json:"failed_requests"`
QualityScore float64 `json:"quality_score"` // Weighted score 0.0 to 1.0
WeightedAvailability float64 `json:"weighted_availability"` // Business impact weighted
ErrorBudget *ErrorBudget `json:"error_budget,omitempty"`
IncidentCount int `json:"incident_count"`
MTTR time.Duration `json:"mttr"` // Mean Time To Recovery
MTBF time.Duration `json:"mtbf"` // Mean Time Between Failures
Metadata json.RawMessage `json:"metadata"`
}
type AvailabilityCalculator ¶
type AvailabilityCalculator struct {
// contains filtered or unexported fields
}
func NewAvailabilityCalculator ¶
func NewAvailabilityCalculator( config *AvailabilityCalculatorConfig, tracker *MultiDimensionalTracker, ) (*AvailabilityCalculator, error)
func (*AvailabilityCalculator) GetCalculation ¶
func (ac *AvailabilityCalculator) GetCalculation(entityID, entityType string, window TimeWindow) (*AvailabilityCalculation, bool)
func (*AvailabilityCalculator) GetCalculationHistory ¶
func (ac *AvailabilityCalculator) GetCalculationHistory( entityID, entityType string, window TimeWindow, since time.Time, until time.Time, ) []AvailabilityCalculation
func (*AvailabilityCalculator) GetCalculationsForEntity ¶
func (ac *AvailabilityCalculator) GetCalculationsForEntity(entityID, entityType string) map[TimeWindow]*AvailabilityCalculation
func (*AvailabilityCalculator) GetSLACompliance ¶
func (ac *AvailabilityCalculator) GetSLACompliance(entityID, entityType string, window TimeWindow) (*SLAComplianceStatus, error)
func (*AvailabilityCalculator) Start ¶
func (ac *AvailabilityCalculator) Start() error
func (*AvailabilityCalculator) Stop ¶
func (ac *AvailabilityCalculator) Stop() error
type AvailabilityCalculatorConfig ¶
type AvailabilityCalculatorConfig struct {
DefaultSLATarget SLATarget `json:"default_sla_target"`
BusinessHours BusinessHoursConfig `json:"business_hours"`
MaintenanceWindows []MaintenanceWindow `json:"maintenance_windows"`
CalculationInterval time.Duration `json:"calculation_interval"`
RetentionPeriod time.Duration `json:"retention_period"`
EnabledWindows []TimeWindow `json:"enabled_windows"`
MaxDataPoints int `json:"max_data_points"` // Max data points per calculation
SamplingInterval time.Duration `json:"sampling_interval"` // How often to sample data
AvailabilityWeight float64 `json:"availability_weight"` // Weight for availability in quality score
PerformanceWeight float64 `json:"performance_weight"` // Weight for performance in quality score
ErrorRateWeight float64 `json:"error_rate_weight"` // Weight for error rate in quality score
EnableBusinessWeighting bool `json:"enable_business_weighting"`
BusinessImpactWeights map[BusinessImpact]float64 `json:"business_impact_weights"`
ErrorBudgetAlertThresholds BudgetAlertThresholds `json:"error_budget_alert_thresholds"`
}
type AvailabilityDimension ¶
type AvailabilityDimension string
const ( DimensionService AvailabilityDimension = "service" DimensionComponent AvailabilityDimension = "component" DimensionUserJourney AvailabilityDimension = "user_journey" DimensionBusiness AvailabilityDimension = "business" )
type AvailabilityMetric ¶
type AvailabilityMetric struct {
Timestamp time.Time `json:"timestamp"`
Dimension AvailabilityDimension `json:"dimension"`
EntityID string `json:"entity_id"`
EntityType string `json:"entity_type"`
Status AvailabilityStatus `json:"status"`
ResponseTime time.Duration `json:"response_time"`
ErrorRate float64 `json:"error_rate"`
BusinessImpact BusinessImpact `json:"business_impact"`
Layer ServiceLayer `json:"layer"`
Metadata json.RawMessage `json:"metadata"`
}
type AvailabilityReport ¶
type AvailabilityReport struct {
ID string `json:"id"`
Type ReportType `json:"type"`
Format ReportFormat `json:"format"`
GeneratedAt time.Time `json:"generated_at"`
TimeWindow TimeWindow `json:"time_window"`
StartTime time.Time `json:"start_time"`
EndTime time.Time `json:"end_time"`
Summary *AvailabilitySummary `json:"summary"`
ServiceMetrics []ServiceAvailability `json:"service_metrics,omitempty"`
ComponentMetrics []ComponentAvailability `json:"component_metrics,omitempty"`
DependencyMetrics []DependencyAvailability `json:"dependency_metrics,omitempty"`
UserJourneyMetrics []UserJourneyAvailability `json:"user_journey_metrics,omitempty"`
SLACompliance *SLAComplianceReport `json:"sla_compliance,omitempty"`
ErrorBudgets []ErrorBudgetStatus `json:"error_budgets,omitempty"`
Incidents []IncidentSummary `json:"incidents,omitempty"`
AlertHistory []AlertSummary `json:"alert_history,omitempty"`
TrendAnalysis *TrendAnalysis `json:"trend_analysis,omitempty"`
PredictiveInsights *PredictiveInsights `json:"predictive_insights,omitempty"`
Metadata json.RawMessage `json:"metadata,omitempty"`
}
type AvailabilityReporter ¶
type AvailabilityReporter struct {
// contains filtered or unexported fields
}
func NewAvailabilityReporter ¶
func NewAvailabilityReporter( config *ReporterConfig, tracker *MultiDimensionalTracker, calculator *AvailabilityCalculator, dependencyTracker *DependencyChainTracker, syntheticMonitor *SyntheticMonitor, promClient api.Client, ) (*AvailabilityReporter, error)
func (*AvailabilityReporter) CreateDashboard ¶
func (ar *AvailabilityReporter) CreateDashboard(dashboardID, name string, reportType ReportType, refreshInterval time.Duration) (*Dashboard, error)
func (*AvailabilityReporter) ExportReport ¶
func (ar *AvailabilityReporter) ExportReport(ctx context.Context, reportID string, format ReportFormat) ([]byte, error)
func (*AvailabilityReporter) GenerateReport ¶
func (ar *AvailabilityReporter) GenerateReport(ctx context.Context, reportType ReportType, timeWindow TimeWindow, format ReportFormat) (*AvailabilityReport, error)
func (*AvailabilityReporter) GetDashboard ¶
func (ar *AvailabilityReporter) GetDashboard(dashboardID string) (*Dashboard, error)
func (*AvailabilityReporter) GetReport ¶
func (ar *AvailabilityReporter) GetReport(reportID string) (*AvailabilityReport, error)
func (*AvailabilityReporter) GetReports ¶
func (ar *AvailabilityReporter) GetReports() []*AvailabilityReport
func (*AvailabilityReporter) Start ¶
func (ar *AvailabilityReporter) Start() error
func (*AvailabilityReporter) Stop ¶
func (ar *AvailabilityReporter) Stop() error
func (*AvailabilityReporter) SubscribeToDashboard ¶
func (ar *AvailabilityReporter) SubscribeToDashboard(dashboardID string) (<-chan *Dashboard, error)
type AvailabilityState ¶
type AvailabilityState struct {
CurrentMetrics map[string]*AvailabilityMetric `json:"current_metrics"`
AggregatedStatus AvailabilityStatus `json:"aggregated_status"`
LastUpdate time.Time `json:"last_update"`
BusinessImpactScore float64 `json:"business_impact_score"`
}
type AvailabilityStatus ¶
type AvailabilityStatus string
const ( HealthHealthy AvailabilityStatus = "healthy" HealthDegraded AvailabilityStatus = "degraded" HealthUnhealthy AvailabilityStatus = "unhealthy" HealthUnknown AvailabilityStatus = "unknown" )
type AvailabilitySummary ¶
type AvailabilitySummary struct {
OverallAvailability float64 `json:"overall_availability"` // 0.0 to 1.0
WeightedAvailability float64 `json:"weighted_availability"` // Business impact weighted
TargetAvailability float64 `json:"target_availability"` // SLA target
ComplianceStatus ComplianceStatus `json:"compliance_status"`
TotalDowntime time.Duration `json:"total_downtime"`
MeanTimeToRecovery time.Duration `json:"mean_time_to_recovery"`
MeanTimeBetweenFailures time.Duration `json:"mean_time_between_failures"`
IncidentCount int `json:"incident_count"`
CriticalIncidentCount int `json:"critical_incident_count"`
BusinessImpactScore float64 `json:"business_impact_score"` // 0-100
AffectedUserCount int64 `json:"affected_user_count"`
EstimatedRevenueLoss float64 `json:"estimated_revenue_loss"`
AverageResponseTime time.Duration `json:"average_response_time"`
P95ResponseTime time.Duration `json:"p95_response_time"`
P99ResponseTime time.Duration `json:"p99_response_time"`
ErrorRate float64 `json:"error_rate"` // 0.0 to 1.0
HealthyPercentage float64 `json:"healthy_percentage"`
DegradedPercentage float64 `json:"degraded_percentage"`
UnhealthyPercentage float64 `json:"unhealthy_percentage"`
}
type BudgetAlertThresholds ¶
type BusinessHours ¶
type BusinessHoursConfig ¶
type BusinessHoursConfig struct {
Enabled bool `json:"enabled"`
StartHour int `json:"start_hour"` // 0-23
EndHour int `json:"end_hour"` // 0-23
WeekDays []time.Weekday `json:"weekdays"`
Timezone string `json:"timezone"`
Weight float64 `json:"weight"` // Weight multiplier for business hours
NonBusinessWeight float64 `json:"non_business_weight"` // Weight for non-business hours
}
type BusinessImpact ¶
type BusinessImpact int
const ( ImpactCritical BusinessImpact = 5 // Complete service unavailable ImpactHigh BusinessImpact = 4 // Major functionality affected ImpactMedium BusinessImpact = 3 // Some functionality affected ImpactLow BusinessImpact = 2 // Minor functionality affected ImpactMinimal BusinessImpact = 1 // No user-facing impact )
type CapacityForecast ¶
type CapacityForecast struct {
TimeHorizon time.Duration `json:"time_horizon"`
ProjectedLoad float64 `json:"projected_load"`
CurrentCapacity float64 `json:"current_capacity"`
RequiredCapacity float64 `json:"required_capacity"`
CapacityGap float64 `json:"capacity_gap"`
ScalingRecommendation string `json:"scaling_recommendation"`
}
type CascadeFailureAnalysis ¶
type CascadeFailureAnalysis struct {
FailedDependency string `json:"failed_dependency"`
ImpactedServices []string `json:"impacted_services"`
BusinessImpact float64 `json:"business_impact"`
CascadeDepth int `json:"cascade_depth"`
RecoveryPath []string `json:"recovery_path"`
EstimatedRecoveryTime time.Duration `json:"estimated_recovery_time"`
Timestamp time.Time `json:"timestamp"`
}
type ChaosCheckExecutor ¶
type ChaosCheckExecutor struct{}
func (*ChaosCheckExecutor) Execute ¶
func (e *ChaosCheckExecutor) Execute(ctx context.Context, check *SyntheticCheck) (*SyntheticResult, error)
func (*ChaosCheckExecutor) Type ¶
func (e *ChaosCheckExecutor) Type() SyntheticCheckType
type CheckConfig ¶
type CheckConfig struct {
URL string `json:"url,omitempty"`
Method string `json:"method,omitempty"`
Headers map[string]string `json:"headers,omitempty"`
Body string `json:"body,omitempty"`
ExpectedStatus int `json:"expected_status,omitempty"`
ExpectedBody string `json:"expected_body,omitempty"`
FollowRedirects bool `json:"follow_redirects,omitempty"`
SkipTLS bool `json:"skip_tls,omitempty"`
IntentPayload json.RawMessage `json:"intent_payload,omitempty"`
ExpectedResponse json.RawMessage `json:"expected_response,omitempty"`
FlowSteps []IntentFlowStep `json:"flow_steps,omitempty"`
ConnectionString string `json:"connection_string,omitempty"`
Query string `json:"query,omitempty"`
ExpectedRows int `json:"expected_rows,omitempty"`
ServiceName string `json:"service_name,omitempty"`
ServiceEndpoint string `json:"service_endpoint,omitempty"`
ChaosType string `json:"chaos_type,omitempty"`
ChaosDuration time.Duration `json:"chaos_duration,omitempty"`
ChaosIntensity float64 `json:"chaos_intensity,omitempty"`
}
type CheckExecutor ¶
type CheckExecutor interface {
Execute(ctx context.Context, check *SyntheticCheck) (*SyntheticResult, error)
Type() SyntheticCheckType
}
func NewChaosCheckExecutor ¶
func NewChaosCheckExecutor() CheckExecutor
func NewDatabaseCheckExecutor ¶
func NewDatabaseCheckExecutor() CheckExecutor
func NewExternalServiceExecutor ¶
func NewExternalServiceExecutor(client *http.Client) CheckExecutor
func NewHTTPCheckExecutor ¶
func NewHTTPCheckExecutor(client *http.Client) CheckExecutor
func NewIntentFlowExecutor ¶
func NewIntentFlowExecutor(client *http.Client, endpoint, token string) CheckExecutor
type CircuitBreakerConfig ¶
type CircuitBreakerConfig struct {
Enabled bool `json:"enabled"`
FailureThreshold int `json:"failure_threshold"`
RecoveryTimeout time.Duration `json:"recovery_timeout"`
HalfOpenMaxCalls int `json:"half_open_max_calls"`
MinRequestsThreshold int `json:"min_requests_threshold"`
ConsecutiveSuccesses int `json:"consecutive_successes"`
}
type CircuitBreakerState ¶
type CircuitBreakerState struct {
DependencyID string `json:"dependency_id"`
State string `json:"state"` // closed, open, half_open
FailureCount int `json:"failure_count"`
LastFailureTime time.Time `json:"last_failure_time"`
NextRetryTime time.Time `json:"next_retry_time"`
HalfOpenCalls int `json:"half_open_calls"`
ConsecutiveSuccesses int `json:"consecutive_successes"`
}
type CompliancePoint ¶
type ComplianceStatus ¶
type ComplianceStatus string
const ( ComplianceHealthy ComplianceStatus = "healthy" // Within SLA targets ComplianceWarning ComplianceStatus = "warning" // Approaching SLA breach ComplianceCritical ComplianceStatus = "critical" // SLA breached ComplianceUnknown ComplianceStatus = "unknown" // Insufficient data )
type ComponentAvailability ¶
type ComponentAvailability struct {
ComponentName string `json:"component_name"`
ComponentType string `json:"component_type"`
Namespace string `json:"namespace"`
BusinessImpact BusinessImpact `json:"business_impact"`
Availability float64 `json:"availability"`
HealthStatus monitoring.HealthStatus `json:"health_status"`
RestartCount int `json:"restart_count"`
ReadinessFailures int `json:"readiness_failures"`
LivenessFailures int `json:"liveness_failures"`
ResourceUtilization map[string]float64 `json:"resource_utilization"`
LastRestart *time.Time `json:"last_restart,omitempty"`
}
type ComponentConfig ¶
type ComponentConfig struct {
Name string `json:"name"`
Namespace string `json:"namespace"`
Selector map[string]string `json:"selector"`
ResourceType string `json:"resource_type"` // pod, deployment, service
BusinessImpact BusinessImpact `json:"business_impact"`
Layer ServiceLayer `json:"layer"`
}
type ComponentHealthCollector ¶
type ComponentHealthCollector struct {
// contains filtered or unexported fields
}
func NewComponentHealthCollector ¶
func NewComponentHealthCollector(components []ComponentConfig, kubeClient client.Client, kubeClientset kubernetes.Interface) (*ComponentHealthCollector, error)
func (*ComponentHealthCollector) Collect ¶
func (chc *ComponentHealthCollector) Collect(ctx context.Context) ([]*AvailabilityMetric, error)
func (*ComponentHealthCollector) Dimension ¶
func (chc *ComponentHealthCollector) Dimension() AvailabilityDimension
func (*ComponentHealthCollector) Name ¶
func (chc *ComponentHealthCollector) Name() string
type ComponentStatus ¶
type ComponentStatus struct {
Status AvailabilityStatus
Metadata map[string]interface{}
}
type Dashboard ¶
type Dashboard struct {
ID string `json:"id"`
Name string `json:"name"`
Type ReportType `json:"type"`
LastUpdated time.Time `json:"last_updated"`
RefreshInterval time.Duration `json:"refresh_interval"`
Data *AvailabilityReport `json:"data"`
Panels []DashboardPanel `json:"panels"`
Alerts []ActiveAlert `json:"alerts"`
}
type DashboardPanel ¶
type DashboardPanel struct {
ID string `json:"id"`
Title string `json:"title"`
Type string `json:"type"` // metric, chart, table, alert
Position PanelPosition `json:"position"`
Data interface{} `json:"data"`
Config json.RawMessage `json:"config"`
}
type DatabaseCheckExecutor ¶
type DatabaseCheckExecutor struct{}
func (*DatabaseCheckExecutor) Execute ¶
func (e *DatabaseCheckExecutor) Execute(ctx context.Context, check *SyntheticCheck) (*SyntheticResult, error)
func (*DatabaseCheckExecutor) Type ¶
func (e *DatabaseCheckExecutor) Type() SyntheticCheckType
type Dependency ¶
type Dependency struct {
ID string `json:"id"`
Name string `json:"name"`
Type DependencyType `json:"type"`
ServiceName string `json:"service_name"`
Namespace string `json:"namespace"`
Endpoint string `json:"endpoint"`
BusinessImpact BusinessImpact `json:"business_impact"`
FailureMode FailureMode `json:"failure_mode"`
CircuitBreaker CircuitBreakerConfig `json:"circuit_breaker"`
Dependencies []string `json:"dependencies"` // IDs of dependencies this depends on
Dependents []string `json:"dependents"` // IDs of services that depend on this
HealthChecks []DependencyHealthCheck `json:"health_checks"`
SLARequirements SLARequirements `json:"sla_requirements"`
Tags map[string]string `json:"tags"`
}
type DependencyAvailability ¶
type DependencyAvailability struct {
DependencyName string `json:"dependency_name"`
DependencyType DependencyType `json:"dependency_type"`
BusinessImpact BusinessImpact `json:"business_impact"`
Availability float64 `json:"availability"`
HealthStatus DependencyStatus `json:"health_status"`
ResponseTime time.Duration `json:"response_time"`
ErrorRate float64 `json:"error_rate"`
CircuitBreakerState string `json:"circuit_breaker_state"`
FailureCount int `json:"failure_count"`
LastFailure *time.Time `json:"last_failure,omitempty"`
RecoveryTime time.Duration `json:"recovery_time"`
}
type DependencyChain ¶
type DependencyChain struct {
ID string `json:"id"`
Name string `json:"name"`
Services []ServiceDependency `json:"services"`
CriticalPath bool `json:"critical_path"`
Metadata json.RawMessage `json:"metadata,omitempty"`
}
type DependencyChainTracker ¶
type DependencyChainTracker struct {
// contains filtered or unexported fields
}
func NewDependencyChainTracker ¶
func NewDependencyChainTracker() *DependencyChainTracker
func (*DependencyChainTracker) AddChain ¶
func (dct *DependencyChainTracker) AddChain(chain *DependencyChain) error
func (*DependencyChainTracker) AnalyzeImpact ¶
func (dct *DependencyChainTracker) AnalyzeImpact(serviceName string) ([]string, error)
func (*DependencyChainTracker) GetAllChains ¶
func (dct *DependencyChainTracker) GetAllChains() []*DependencyChain
func (*DependencyChainTracker) GetChainStatus ¶
func (dct *DependencyChainTracker) GetChainStatus(chainID string) (string, error)
func (*DependencyChainTracker) GetCriticalPath ¶
func (dct *DependencyChainTracker) GetCriticalPath() ([]string, error)
func (*DependencyChainTracker) GetServiceStatus ¶
func (dct *DependencyChainTracker) GetServiceStatus(serviceName string) (*DependencyServiceStatus, error)
func (*DependencyChainTracker) RemoveChain ¶
func (dct *DependencyChainTracker) RemoveChain(chainID string) error
func (*DependencyChainTracker) Start ¶
func (dct *DependencyChainTracker) Start() error
func (*DependencyChainTracker) Stop ¶
func (dct *DependencyChainTracker) Stop() error
func (*DependencyChainTracker) UpdateStatus ¶
type DependencyGraph ¶
type DependencyHealth ¶
type DependencyHealth struct {
DependencyID string `json:"dependency_id"`
Status DependencyStatus `json:"status"`
LastUpdate time.Time `json:"last_update"`
ResponseTime time.Duration `json:"response_time"`
ErrorRate float64 `json:"error_rate"`
Availability float64 `json:"availability"`
CircuitBreakerState string `json:"circuit_breaker_state"`
FailureCount int `json:"failure_count"`
LastFailure time.Time `json:"last_failure"`
RecoveryTime time.Duration `json:"recovery_time"`
HealthCheckResults []HealthCheckResult `json:"health_check_results"`
}
type DependencyHealthCheck ¶
type DependencyHealthCheck struct {
Type string `json:"type"` // prometheus, http, tcp, dns
Target string `json:"target"`
Timeout time.Duration `json:"timeout"`
Interval time.Duration `json:"interval"`
FailureThreshold int `json:"failure_threshold"`
SuccessThreshold int `json:"success_threshold"`
Query string `json:"query,omitempty"` // For Prometheus queries
ExpectedStatus int `json:"expected_status,omitempty"` // For HTTP checks
}
type DependencyServiceStatus ¶
type DependencyServiceStatus struct {
ServiceName string `json:"service_name"`
Status DependencyStatus `json:"status"`
ResponseTime time.Duration `json:"response_time"`
LastChecked time.Time `json:"last_checked"`
FailureReason string `json:"failure_reason,omitempty"`
ConsecutiveFails int `json:"consecutive_fails"`
Metadata json.RawMessage `json:"metadata,omitempty"`
}
type DependencyStatus ¶
type DependencyStatus string
const ( DepStatusHealthy DependencyStatus = "healthy" DepStatusDegraded DependencyStatus = "degraded" DepStatusUnhealthy DependencyStatus = "unhealthy" DepStatusUnknown DependencyStatus = "unknown" DepStatusCircuitOpen DependencyStatus = "circuit_open" )
type DependencyTracker ¶
type DependencyTracker struct {
// contains filtered or unexported fields
}
func NewDependencyTracker ¶
func NewDependencyTracker( config *DependencyTrackerConfig, kubeClient client.Client, kubeClientset kubernetes.Interface, promClient api.Client, ) (*DependencyTracker, error)
func (*DependencyTracker) AddDependency ¶
func (dt *DependencyTracker) AddDependency(dep *Dependency) error
func (*DependencyTracker) GetAllDependencyHealth ¶
func (dt *DependencyTracker) GetAllDependencyHealth() map[string]*DependencyHealth
func (*DependencyTracker) GetAvailabilityMetrics ¶
func (dt *DependencyTracker) GetAvailabilityMetrics(dependencyID string) (*AvailabilityMetric, error)
func (*DependencyTracker) GetCascadeAnalysis ¶
func (dt *DependencyTracker) GetCascadeAnalysis(since time.Time) []CascadeFailureAnalysis
func (*DependencyTracker) GetCircuitBreakerStates ¶
func (dt *DependencyTracker) GetCircuitBreakerStates() map[string]*CircuitBreakerState
func (*DependencyTracker) GetDependencyHealth ¶
func (dt *DependencyTracker) GetDependencyHealth(dependencyID string) (*DependencyHealth, bool)
func (*DependencyTracker) Start ¶
func (dt *DependencyTracker) Start() error
func (*DependencyTracker) Stop ¶
func (dt *DependencyTracker) Stop() error
type DependencyTrackerConfig ¶
type DependencyTrackerConfig struct {
MonitoringInterval time.Duration `json:"monitoring_interval"`
HealthCheckTimeout time.Duration `json:"health_check_timeout"`
CascadeAnalysisDepth int `json:"cascade_analysis_depth"`
RetentionPeriod time.Duration `json:"retention_period"`
EnableCircuitBreaker bool `json:"enable_circuit_breaker"`
ServiceMeshEnabled bool `json:"service_mesh_enabled"`
ServiceMeshType string `json:"service_mesh_type"` // istio, linkerd, consul
PrometheusEnabled bool `json:"prometheus_enabled"`
JaegerEnabled bool `json:"jaeger_enabled"`
AlertingEnabled bool `json:"alerting_enabled"`
CriticalDependencies []string `json:"critical_dependencies"`
}
type DependencyType ¶
type DependencyType string
const ( DepTypeDatabase DependencyType = "database" DepTypeExternalAPI DependencyType = "external_api" DepTypeInternalService DependencyType = "internal_service" DepTypeMessageQueue DependencyType = "message_queue" DepTypeCache DependencyType = "cache" DepTypeStorage DependencyType = "storage" DepTypeLLMService DependencyType = "llm_service" DepTypeK8sAPI DependencyType = "kubernetes_api" )
type ErrorBudget ¶
type ErrorBudget struct {
Target SLATarget `json:"target"`
TotalTime time.Duration `json:"total_time"`
AllowedDowntime time.Duration `json:"allowed_downtime"`
ActualDowntime time.Duration `json:"actual_downtime"`
RemainingDowntime time.Duration `json:"remaining_downtime"`
BudgetUtilization float64 `json:"budget_utilization"` // 0.0 to 1.0
BurnRate float64 `json:"burn_rate"` // Current consumption rate
TimeToExhaustion time.Duration `json:"time_to_exhaustion"` // Time until budget exhausted
IsExhausted bool `json:"is_exhausted"`
AlertThresholds BudgetAlertThresholds `json:"alert_thresholds"`
}
type ErrorBudgetStatus ¶
type ErrorBudgetStatus struct {
Service string `json:"service"`
Target SLATarget `json:"target"`
TotalBudget time.Duration `json:"total_budget"`
ConsumedBudget time.Duration `json:"consumed_budget"`
RemainingBudget time.Duration `json:"remaining_budget"`
UtilizationPercent float64 `json:"utilization_percent"`
BurnRate float64 `json:"burn_rate"`
IsExhausted bool `json:"is_exhausted"`
AlertLevel string `json:"alert_level"`
}
type ExternalServiceExecutor ¶
type ExternalServiceExecutor struct {
// contains filtered or unexported fields
}
func (*ExternalServiceExecutor) Execute ¶
func (e *ExternalServiceExecutor) Execute(ctx context.Context, check *SyntheticCheck) (*SyntheticResult, error)
func (*ExternalServiceExecutor) Type ¶
func (e *ExternalServiceExecutor) Type() SyntheticCheckType
type FailureMode ¶
type FailureMode string
const ( FailureModeHardFail FailureMode = "hard_fail" // Service cannot function FailureModeSoftFail FailureMode = "soft_fail" // Service degrades gracefully FailureModeCircuitBreak FailureMode = "circuit_break" // Circuit breaker protects FailureModeRetry FailureMode = "retry" // Automatic retry logic )
type FailureProbability ¶
type FailureProbability struct {
Component string `json:"component"`
Service string `json:"service"`
Probability float64 `json:"probability"` // 0-1
TimeFrame string `json:"time_frame"` // next_hour, next_day, next_week
Impact BusinessImpact `json:"impact"`
}
type HTTPCheckExecutor ¶
type HTTPCheckExecutor struct {
// contains filtered or unexported fields
}
func (*HTTPCheckExecutor) Execute ¶
func (e *HTTPCheckExecutor) Execute(ctx context.Context, check *SyntheticCheck) (*SyntheticResult, error)
func (*HTTPCheckExecutor) Type ¶
func (e *HTTPCheckExecutor) Type() SyntheticCheckType
type HealthCheckResult ¶
type IncidentSummary ¶
type IncidentSummary struct {
ID string `json:"id"`
Title string `json:"title"`
Severity string `json:"severity"`
StartTime time.Time `json:"start_time"`
EndTime *time.Time `json:"end_time,omitempty"`
Duration time.Duration `json:"duration"`
Status string `json:"status"`
AffectedServices []string `json:"affected_services"`
BusinessImpact BusinessImpact `json:"business_impact"`
RootCause string `json:"root_cause"`
Resolution string `json:"resolution"`
Postmortem string `json:"postmortem"`
}
type IntentFlowExecutor ¶
type IntentFlowExecutor struct {
// contains filtered or unexported fields
}
func (*IntentFlowExecutor) Execute ¶
func (e *IntentFlowExecutor) Execute(ctx context.Context, check *SyntheticCheck) (*SyntheticResult, error)
func (*IntentFlowExecutor) Type ¶
func (e *IntentFlowExecutor) Type() SyntheticCheckType
type IntentFlowStep ¶
type IntentFlowStep struct {
Name string `json:"name"`
Action string `json:"action"` // create_intent, check_status, validate_deployment
Payload interface{} `json:"payload"`
ExpectedStatus string `json:"expected_status"`
MaxWaitTime time.Duration `json:"max_wait_time"`
ValidationRules []monitoring.ValidationRule `json:"validation_rules"`
}
type LiveDashboardUpdater ¶
type LiveDashboardUpdater struct {
// contains filtered or unexported fields
}
type MaintenanceWindow ¶
type MaintenanceWindow struct {
ID string `json:"id"`
Name string `json:"name"`
StartTime time.Time `json:"start_time"`
EndTime time.Time `json:"end_time"`
Recurring bool `json:"recurring"`
RecurrenceRule string `json:"recurrence_rule,omitempty"` // RRULE format
EntityFilter map[string]string `json:"entity_filter"` // Filter which entities this applies to
ExcludeFromSLA bool `json:"exclude_from_sla"`
}
type MetricCollector ¶
type MetricCollector interface {
Collect(ctx context.Context) ([]*AvailabilityMetric, error)
Name() string
Dimension() AvailabilityDimension
}
type MultiDimensionalTracker ¶
type MultiDimensionalTracker struct {
// contains filtered or unexported fields
}
func NewMultiDimensionalTracker ¶
func NewMultiDimensionalTracker( config *TrackerConfig, kubeClient client.Client, kubeClientset kubernetes.Interface, promClient api.Client, cache cache.Cache, ) (*MultiDimensionalTracker, error)
func (*MultiDimensionalTracker) GetCurrentState ¶
func (t *MultiDimensionalTracker) GetCurrentState() *AvailabilityState
func (*MultiDimensionalTracker) GetMetricsByBusinessImpact ¶
func (t *MultiDimensionalTracker) GetMetricsByBusinessImpact(impact BusinessImpact) []*AvailabilityMetric
func (*MultiDimensionalTracker) GetMetricsByDimension ¶
func (t *MultiDimensionalTracker) GetMetricsByDimension(dimension AvailabilityDimension) []*AvailabilityMetric
func (*MultiDimensionalTracker) GetMetricsHistory ¶
func (t *MultiDimensionalTracker) GetMetricsHistory(since, until time.Time) []AvailabilityMetric
func (*MultiDimensionalTracker) Start ¶
func (t *MultiDimensionalTracker) Start() error
func (*MultiDimensionalTracker) Stop ¶
func (t *MultiDimensionalTracker) Stop() error
type PanelPosition ¶
type PredictiveInsights ¶
type PredictiveInsights struct {
PredictedAvailability float64 `json:"predicted_availability"`
PredictionConfidence float64 `json:"prediction_confidence"`
RiskFactors []RiskFactor `json:"risk_factors"`
RecommendedActions []string `json:"recommended_actions"`
CapacityForecast CapacityForecast `json:"capacity_forecast"`
FailureProbabilities []FailureProbability `json:"failure_probabilities"`
}
type ReportFormat ¶
type ReportFormat string
const ( FormatJSON ReportFormat = "json" FormatHTML ReportFormat = "html" FormatCSV ReportFormat = "csv" FormatPDF ReportFormat = "pdf" FormatPrometheus ReportFormat = "prometheus" FormatDashboard ReportFormat = "dashboard" )
type ReportType ¶
type ReportType string
const ( ReportTypeLive ReportType = "live" // Real-time status ReportTypeHistorical ReportType = "historical" // Historical analysis ReportTypeSLA ReportType = "sla" // SLA compliance ReportTypeIncident ReportType = "incident" // Incident correlation ReportTypeCompliance ReportType = "compliance" // Audit and compliance ReportTypeTrend ReportType = "trend" // Trend analysis )
type ReporterConfig ¶
type ReporterConfig struct {
DefaultTimeWindow TimeWindow `json:"default_time_window"`
RefreshInterval time.Duration `json:"refresh_interval"`
RetentionPeriod time.Duration `json:"retention_period"`
SLATargets []SLATargetConfig `json:"sla_targets"`
BusinessHours BusinessHours `json:"business_hours"`
PlannedMaintenance []MaintenanceWindow `json:"planned_maintenance"`
DashboardURL string `json:"dashboard_url"`
PrometheusURL string `json:"prometheus_url"`
GrafanaURL string `json:"grafana_url"`
AlertWebhookURL string `json:"alert_webhook_url"`
AlertThresholds AlertThresholds `json:"alert_thresholds"`
ExportFormats []ReportFormat `json:"export_formats"`
S3BucketName string `json:"s3_bucket_name"`
ArchiveEnabled bool `json:"archive_enabled"`
}
type RiskFactor ¶
type SLAComplianceReport ¶
type SLAComplianceReport struct {
Target SLATarget `json:"target"`
CurrentAvailability float64 `json:"current_availability"`
RequiredAvailability float64 `json:"required_availability"`
Status ComplianceStatus `json:"status"`
ErrorBudget ErrorBudgetStatus `json:"error_budget"`
RemainingBudget time.Duration `json:"remaining_budget"`
BudgetBurnRate float64 `json:"budget_burn_rate"`
ProjectedExhaustion *time.Time `json:"projected_exhaustion,omitempty"`
ComplianceHistory []CompliancePoint `json:"compliance_history"`
}
type SLAComplianceStatus ¶
type SLAComplianceStatus struct {
EntityID string `json:"entity_id"`
EntityType string `json:"entity_type"`
TimeWindow TimeWindow `json:"time_window"`
SLATarget SLATarget `json:"sla_target"`
CurrentAvailability float64 `json:"current_availability"`
RequiredAvailability float64 `json:"required_availability"`
IsCompliant bool `json:"is_compliant"`
ComplianceLevel string `json:"compliance_level"` // healthy, warning, critical, breach
ErrorBudget ErrorBudget `json:"error_budget"`
LastUpdate time.Time `json:"last_update"`
}
type SLARequirements ¶
type SLARequirements struct {
Availability float64 `json:"availability"` // 0.0 to 1.0
ResponseTime time.Duration `json:"response_time"` // P95 response time requirement
ErrorRate float64 `json:"error_rate"` // Maximum error rate (0.0 to 1.0)
MTTR time.Duration `json:"mttr"` // Mean Time To Recovery
MTBF time.Duration `json:"mtbf"` // Mean Time Between Failures
}
type SLATarget ¶
type SLATarget string
const ( SLA99_95 SLATarget = "99.95" // 4.38 hours/year, 21.56 minutes/month SLA99_9 SLATarget = "99.9" // 8.77 hours/year, 43.83 minutes/month SLA99_5 SLATarget = "99.5" // 43.83 hours/year, 3.65 hours/month SLA99 SLATarget = "99" // 87.66 hours/year, 7.31 hours/month SLA95 SLATarget = "95" // 438.3 hours/year, 36.53 hours/month )
type SLATargetConfig ¶
type SLATargetConfig struct {
Service string `json:"service"`
Target SLATarget `json:"target"`
BusinessImpact BusinessImpact `json:"business_impact"`
Enabled bool `json:"enabled"`
}
type SeasonalPattern ¶
type SeasonalPattern struct {
Pattern string `json:"pattern"` // daily, weekly, monthly
PeakTime string `json:"peak_time"` // When issues are most common
LowTime string `json:"low_time"` // When issues are least common
Variation float64 `json:"variation"` // Availability variation %
Confidence float64 `json:"confidence"` // Pattern confidence 0-1
}
type ServiceAvailability ¶
type ServiceAvailability struct {
ServiceName string `json:"service_name"`
ServiceType string `json:"service_type"`
Layer ServiceLayer `json:"layer"`
BusinessImpact BusinessImpact `json:"business_impact"`
Availability float64 `json:"availability"`
Uptime time.Duration `json:"uptime"`
Downtime time.Duration `json:"downtime"`
IncidentCount int `json:"incident_count"`
AverageResponseTime time.Duration `json:"average_response_time"`
ErrorRate float64 `json:"error_rate"`
HealthStatus monitoring.HealthStatus `json:"health_status"`
LastIncident *time.Time `json:"last_incident,omitempty"`
RecoveryTime time.Duration `json:"recovery_time"`
}
type ServiceDependency ¶
type ServiceEndpointConfig ¶
type ServiceEndpointConfig struct {
Name string `json:"name"`
URL string `json:"url"`
Method string `json:"method"`
ExpectedStatus int `json:"expected_status"`
Timeout time.Duration `json:"timeout"`
BusinessImpact BusinessImpact `json:"business_impact"`
Layer ServiceLayer `json:"layer"`
SLAThreshold time.Duration `json:"sla_threshold"`
}
type ServiceLayer ¶
type ServiceLayer string
const ( LayerAPI ServiceLayer = "api" LayerController ServiceLayer = "controller" LayerProcessor ServiceLayer = "processor" LayerStorage ServiceLayer = "storage" LayerExternal ServiceLayer = "external" )
type ServiceLayerCollector ¶
type ServiceLayerCollector struct {
// contains filtered or unexported fields
}
func NewServiceLayerCollector ¶
func NewServiceLayerCollector(endpoints []ServiceEndpointConfig, promClient v1.API) (*ServiceLayerCollector, error)
func (*ServiceLayerCollector) Collect ¶
func (slc *ServiceLayerCollector) Collect(ctx context.Context) ([]*AvailabilityMetric, error)
func (*ServiceLayerCollector) Dimension ¶
func (slc *ServiceLayerCollector) Dimension() AvailabilityDimension
func (*ServiceLayerCollector) Name ¶
func (slc *ServiceLayerCollector) Name() string
type StepResult ¶
type StepResult struct {
StepName string `json:"step_name"`
Status SyntheticCheckStatus `json:"status"`
ResponseTime time.Duration `json:"response_time"`
Error string `json:"error,omitempty"`
Output interface{} `json:"output,omitempty"`
}
type SyntheticCheck ¶
type SyntheticCheck struct {
ID string `json:"id"`
Name string `json:"name"`
Type SyntheticCheckType `json:"type"`
Enabled bool `json:"enabled"`
Interval time.Duration `json:"interval"`
Timeout time.Duration `json:"timeout"`
RetryCount int `json:"retry_count"`
RetryDelay time.Duration `json:"retry_delay"`
BusinessImpact BusinessImpact `json:"business_impact"`
Region string `json:"region"`
Tags map[string]string `json:"tags"`
Config CheckConfig `json:"config"`
AlertThresholds AlertThresholds `json:"alert_thresholds"`
}
type SyntheticCheckStatus ¶
type SyntheticCheckStatus string
const ( CheckStatusPass SyntheticCheckStatus = "pass" CheckStatusFail SyntheticCheckStatus = "fail" CheckStatusTimeout SyntheticCheckStatus = "timeout" CheckStatusError SyntheticCheckStatus = "error" )
type SyntheticCheckType ¶
type SyntheticCheckType string
const ( CheckTypeHTTP SyntheticCheckType = "http" CheckTypeIntentFlow SyntheticCheckType = "intent_flow" CheckTypeDatabase SyntheticCheckType = "database" CheckTypeExternal SyntheticCheckType = "external" CheckTypeChaos SyntheticCheckType = "chaos" )
type SyntheticMonitor ¶
type SyntheticMonitor struct {
// contains filtered or unexported fields
}
func NewSyntheticMonitor ¶
func NewSyntheticMonitor(config *SyntheticMonitorConfig, promClient api.Client, alertManager AlertManager) (*SyntheticMonitor, error)
func (*SyntheticMonitor) AddCheck ¶
func (sm *SyntheticMonitor) AddCheck(check *SyntheticCheck) error
func (*SyntheticMonitor) GetAvailabilityMetrics ¶
func (sm *SyntheticMonitor) GetAvailabilityMetrics(checkID string, since, until time.Time) (*AvailabilityMetric, error)
func (*SyntheticMonitor) GetCheck ¶
func (sm *SyntheticMonitor) GetCheck(checkID string) (*SyntheticCheck, bool)
func (*SyntheticMonitor) GetResults ¶
func (sm *SyntheticMonitor) GetResults(since, until time.Time) []SyntheticResult
func (*SyntheticMonitor) GetResultsByCheck ¶
func (sm *SyntheticMonitor) GetResultsByCheck(checkID string, since, until time.Time) []SyntheticResult
func (*SyntheticMonitor) ListChecks ¶
func (sm *SyntheticMonitor) ListChecks() []*SyntheticCheck
func (*SyntheticMonitor) RemoveCheck ¶
func (sm *SyntheticMonitor) RemoveCheck(checkID string) error
func (*SyntheticMonitor) Start ¶
func (sm *SyntheticMonitor) Start() error
func (*SyntheticMonitor) Stop ¶
func (sm *SyntheticMonitor) Stop() error
type SyntheticMonitorConfig ¶
type SyntheticMonitorConfig struct {
MaxConcurrentChecks int `json:"max_concurrent_checks"`
DefaultTimeout time.Duration `json:"default_timeout"`
DefaultRetryCount int `json:"default_retry_count"`
DefaultRetryDelay time.Duration `json:"default_retry_delay"`
ResultRetention time.Duration `json:"result_retention"`
RegionID string `json:"region_id"`
EnableChaosTests bool `json:"enable_chaos_tests"`
ChaosTestInterval time.Duration `json:"chaos_test_interval"`
HTTPTimeout time.Duration `json:"http_timeout"`
HTTPMaxIdleConns int `json:"http_max_idle_conns"`
HTTPMaxConnsPerHost int `json:"http_max_conns_per_host"`
HTTPSkipTLS bool `json:"http_skip_tls"`
IntentAPIEndpoint string `json:"intent_api_endpoint"`
IntentAPIToken string `json:"intent_api_token"`
AlertingEnabled bool `json:"alerting_enabled"`
AlertWebhookURL string `json:"alert_webhook_url"`
AlertRetention time.Duration `json:"alert_retention"`
}
type SyntheticResult ¶
type SyntheticResult struct {
CheckID string `json:"check_id"`
CheckName string `json:"check_name"`
Timestamp time.Time `json:"timestamp"`
Status SyntheticCheckStatus `json:"status"`
ResponseTime time.Duration `json:"response_time"`
Error string `json:"error,omitempty"`
Region string `json:"region"`
HTTPStatus int `json:"http_status,omitempty"`
StepResults []StepResult `json:"step_results,omitempty"`
Metadata json.RawMessage `json:"metadata,omitempty"`
}
type TimeWindow ¶
type TimeWindow string
const ( Window1Minute TimeWindow = "1m" Window5Minutes TimeWindow = "5m" Window1Hour TimeWindow = "1h" Window1Day TimeWindow = "1d" Window1Week TimeWindow = "7d" Window1Month TimeWindow = "30d" )
type TrackerConfig ¶
type TrackerConfig struct {
ServiceEndpoints []ServiceEndpointConfig `json:"service_endpoints"`
Components []ComponentConfig `json:"components"`
UserJourneys []UserJourneyConfig `json:"user_journeys"`
DegradedThreshold time.Duration `json:"degraded_threshold"` // Response time threshold for degraded state
UnhealthyThreshold time.Duration `json:"unhealthy_threshold"` // Response time threshold for unhealthy state
ErrorRateThreshold float64 `json:"error_rate_threshold"` // Error rate threshold for degraded state
CollectionInterval time.Duration `json:"collection_interval"` // How often to collect metrics
RetentionPeriod time.Duration `json:"retention_period"` // How long to retain metrics
KubernetesNamespace string `json:"kubernetes_namespace"`
PrometheusURL string `json:"prometheus_url"`
}
type TrendAnalysis ¶
type TrendAnalysis struct {
TimeWindow TimeWindow `json:"time_window"`
AvailabilityTrend float64 `json:"availability_trend"` // Positive = improving
PerformanceTrend float64 `json:"performance_trend"` // Positive = improving
IncidentTrend float64 `json:"incident_trend"` // Negative = improving
ErrorRateTrend float64 `json:"error_rate_trend"` // Negative = improving
PeakUsageHours []int `json:"peak_usage_hours"`
MostReliableDay string `json:"most_reliable_day"`
LeastReliableDay string `json:"least_reliable_day"`
SeasonalPatterns []SeasonalPattern `json:"seasonal_patterns"`
PerformanceCorrelation float64 `json:"performance_correlation"` // With availability
LoadCorrelation float64 `json:"load_correlation"` // With availability
}
type UserJourneyAvailability ¶
type UserJourneyAvailability struct {
JourneyName string `json:"journey_name"`
BusinessImpact BusinessImpact `json:"business_impact"`
Availability float64 `json:"availability"`
SuccessRate float64 `json:"success_rate"`
AverageCompletionTime time.Duration `json:"average_completion_time"`
StepFailures map[string]int `json:"step_failures"`
HealthStatus monitoring.HealthStatus `json:"health_status"`
TotalExecutions int64 `json:"total_executions"`
FailedExecutions int64 `json:"failed_executions"`
}
type UserJourneyCollector ¶
type UserJourneyCollector struct {
// contains filtered or unexported fields
}
func NewUserJourneyCollector ¶
func NewUserJourneyCollector(journeys []UserJourneyConfig, promClient v1.API) (*UserJourneyCollector, error)
func (*UserJourneyCollector) Collect ¶
func (ujc *UserJourneyCollector) Collect(ctx context.Context) ([]*AvailabilityMetric, error)
func (*UserJourneyCollector) Dimension ¶
func (ujc *UserJourneyCollector) Dimension() AvailabilityDimension
func (*UserJourneyCollector) Name ¶
func (ujc *UserJourneyCollector) Name() string
type UserJourneyConfig ¶
type UserJourneyConfig struct {
Name string `json:"name"`
Steps []UserJourneyStep `json:"steps"`
BusinessImpact BusinessImpact `json:"business_impact"`
SLAThreshold time.Duration `json:"sla_threshold"`
Metadata json.RawMessage `json:"metadata"`
}
type UserJourneyStep ¶
type UserJourneyStep struct {
Name string `json:"name"`
Type string `json:"type"` // api_call, database_query, external_service
Target string `json:"target"`
Timeout time.Duration `json:"timeout"`
Required bool `json:"required"`
Weight float64 `json:"weight"` // Weight for calculating journey health
}
Click to show internal directories.
Click to hide internal directories.