monitoring

package
v0.2.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 21, 2026 License: MIT Imports: 4 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func GetSeverityColor

func GetSeverityColor(severity AlertSeverity) string

GetSeverityColor returns the color for alert severity display

func GetSeverityIcon

func GetSeverityIcon(severity AlertSeverity) string

GetSeverityIcon returns an icon for alert severity

Types

type Alert

type Alert struct {
	ID           string
	Type         AlertType
	Severity     AlertSeverity
	Title        string
	Message      string
	Component    string
	Timestamp    time.Time
	Acknowledged bool
	AckedBy      string
	AckedAt      *time.Time
	Resolved     bool
	ResolvedAt   *time.Time
	Count        int
	LastSeen     time.Time
	Metadata     map[string]interface{}
}

Alert represents a cluster alert

type AlertFilter

type AlertFilter struct {
	Types        []AlertType
	Severities   []AlertSeverity
	Components   []string
	Acknowledged *bool
	Resolved     *bool
	SinceTime    *time.Time
	UntilTime    *time.Time
}

AlertFilter defines criteria for filtering alerts

type AlertListener

type AlertListener interface {
	OnAlert(alert *Alert)
	OnAlertResolved(alert *Alert)
}

AlertListener defines the interface for alert listeners

type AlertManager

type AlertManager struct {
	// contains filtered or unexported fields
}

AlertManager manages cluster alerts

func NewAlertManager

func NewAlertManager() *AlertManager

NewAlertManager creates a new alert manager

func (*AlertManager) AcknowledgeAlert

func (am *AlertManager) AcknowledgeAlert(id, ackedBy string) error

AcknowledgeAlert marks an alert as acknowledged

func (*AlertManager) AddAlert

func (am *AlertManager) AddAlert(alert *Alert)

AddAlert adds a new alert or updates an existing one

func (*AlertManager) AddListener

func (am *AlertManager) AddListener(listener AlertListener)

AddListener adds an alert listener

func (*AlertManager) ClearResolvedAlerts

func (am *AlertManager) ClearResolvedAlerts(olderThan time.Duration) int

ClearResolvedAlerts removes all resolved alerts older than the specified duration

func (*AlertManager) GetActiveAlerts

func (am *AlertManager) GetActiveAlerts() []*Alert

GetActiveAlerts returns all unresolved alerts

func (*AlertManager) GetAlert

func (am *AlertManager) GetAlert(id string) *Alert

GetAlert retrieves an alert by ID

func (*AlertManager) GetAlerts

func (am *AlertManager) GetAlerts(filter AlertFilter) []*Alert

GetAlerts returns all alerts, optionally filtered by parameters

func (*AlertManager) GetCriticalAlerts

func (am *AlertManager) GetCriticalAlerts() []*Alert

GetCriticalAlerts returns all critical alerts

func (*AlertManager) GetStats

func (am *AlertManager) GetStats() AlertStats

GetStats returns statistics about alerts

func (*AlertManager) RemoveListener

func (am *AlertManager) RemoveListener(listener AlertListener)

RemoveListener removes an alert listener

func (*AlertManager) ResolveAlert

func (am *AlertManager) ResolveAlert(id string) error

ResolveAlert marks an alert as resolved

type AlertSeverity

type AlertSeverity string

AlertSeverity represents the severity level of an alert

const (
	AlertSeverityInfo     AlertSeverity = "info"
	AlertSeverityWarning  AlertSeverity = "warning"
	AlertSeverityCritical AlertSeverity = "critical"
)

type AlertStats

type AlertStats struct {
	Total          int
	Critical       int
	Warning        int
	Info           int
	Acknowledged   int
	Unacknowledged int
	Active         int
	Resolved       int
}

AlertStats provides statistics about alerts

type AlertType

type AlertType string

AlertType represents the type of alert

const (
	AlertTypeHealth      AlertType = "health"
	AlertTypePerformance AlertType = "performance"
	AlertTypeResource    AlertType = "resource"
	AlertTypeJob         AlertType = "job"
	AlertTypeNode        AlertType = "node"
	AlertTypeSystem      AlertType = "system"
)

type ClusterHealth

type ClusterHealth struct {
	OverallStatus HealthStatus
	Checks        map[string]*HealthCheck
	Issues        []HealthIssue
	LastUpdated   time.Time
	// contains filtered or unexported fields
}

ClusterHealth represents the overall cluster health

type HealthCheck

type HealthCheck struct {
	Name        string
	Description string
	Status      HealthStatus
	Message     string
	LastCheck   time.Time
	CheckCount  int
	Threshold   HealthThreshold
}

HealthCheck represents a single health check

type HealthCheckFunc

type HealthCheckFunc func(client dao.SlurmClient) *HealthCheck

HealthCheckFunc defines a function that performs a health check

type HealthIssue

type HealthIssue struct {
	ID          string
	Component   string
	Severity    HealthStatus
	Title       string
	Description string
	FirstSeen   time.Time
	LastSeen    time.Time
	Count       int
	Resolved    bool
}

HealthIssue represents a specific health issue

type HealthMonitor

type HealthMonitor struct {
	// contains filtered or unexported fields
}

HealthMonitor monitors cluster health and generates alerts

func NewHealthMonitor

func NewHealthMonitor(client dao.SlurmClient, interval time.Duration) *HealthMonitor

NewHealthMonitor creates a new health monitor

func (*HealthMonitor) GetAlertManager

func (hm *HealthMonitor) GetAlertManager() *AlertManager

GetAlertManager returns the alert manager

func (*HealthMonitor) GetHealth

func (hm *HealthMonitor) GetHealth() *ClusterHealth

GetHealth returns the current cluster health

func (*HealthMonitor) Start

func (hm *HealthMonitor) Start()

Start begins health monitoring

func (*HealthMonitor) Stop

func (hm *HealthMonitor) Stop()

Stop stops health monitoring

type HealthStatus

type HealthStatus string

HealthStatus represents the health status of a cluster component

const (
	HealthStatusHealthy  HealthStatus = "healthy"
	HealthStatusWarning  HealthStatus = "warning"
	HealthStatusCritical HealthStatus = "critical"
	HealthStatusUnknown  HealthStatus = "unknown"
)

type HealthThreshold

type HealthThreshold struct {
	WarningMin  *float64
	WarningMax  *float64
	CriticalMin *float64
	CriticalMax *float64
}

HealthThreshold defines warning and critical thresholds for metrics

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL