interruption

package
v1.12.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 24, 2026 License: Apache-2.0 Imports: 42 Imported by: 1

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	ReceivedMessages = opmetrics.NewPrometheusCounter(
		crmetrics.Registry,
		prometheus.CounterOpts{
			Namespace: metrics.Namespace,
			Subsystem: interruptionSubsystem,
			Name:      "received_messages_total",
			Help:      "Count of messages received from the SQS queue. Broken down by message type and whether the message was actionable.",
		},
		[]string{messageTypeLabel},
	)
	DeletedMessages = opmetrics.NewPrometheusCounter(
		crmetrics.Registry,
		prometheus.CounterOpts{
			Namespace: metrics.Namespace,
			Subsystem: interruptionSubsystem,
			Name:      "deleted_messages_total",
			Help:      "Count of messages deleted from the SQS queue.",
		},
		[]string{},
	)
	MessageLatency = opmetrics.NewPrometheusHistogram(
		crmetrics.Registry,
		prometheus.HistogramOpts{
			Namespace: metrics.Namespace,
			Subsystem: interruptionSubsystem,
			Name:      "message_queue_duration_seconds",
			Help:      "Amount of time an interruption message is on the queue before it is processed by karpenter.",
			Buckets:   metrics.DurationBuckets(),
		},
		[]string{},
	)
	InstanceStatusUnhealthy = opmetrics.NewPrometheusCounter(
		crmetrics.Registry,
		prometheus.CounterOpts{
			Namespace: metrics.Namespace,
			Subsystem: interruptionSubsystem,
			Name:      "instance_status_unhealthy_total",
			Help:      "Count of unhealthy instance statuses detected from EC2 DescribeInstanceStatus. Broken down by status check category.",
		},
		[]string{categoryLabel},
	)
)
View Source
var (
	// InstanceStatusInterval is the polling interval for the EC2 DescribeInstanceStatus API.
	InstanceStatusInterval = 1 * time.Minute
)

Functions

This section is empty.

Types

type Action

type Action string
const (
	CordonAndDrain Action = "CordonAndDrain"
	NoAction       Action = "NoAction"
)

type Controller

type Controller struct {
	InterruptionHandler
	// contains filtered or unexported fields
}

Controller is an AWS interruption controller. It continually polls an SQS queue for events from aws.ec2 and aws.health that trigger node health events, spot interruption/rebalance events, and capacity reservation interruptions.

func NewController

func NewController(
	kubeClient client.Client,
	cloudProvider cloudprovider.CloudProvider,
	recorder events.Recorder,
	sqsProvider sqs.Provider,
	sqsAPI *sqsapi.Client,
	unavailableOfferingsCache *cache.UnavailableOfferings,
	capacityReservationProvider capacityreservation.Provider,
) *Controller

func (*Controller) Reconcile

func (c *Controller) Reconcile(ctx context.Context) (reconciler.Result, error)

func (*Controller) Register added in v0.37.0

func (c *Controller) Register(_ context.Context, m manager.Manager) error

type EventParser

type EventParser struct {
	// contains filtered or unexported fields
}

func NewEventParser

func NewEventParser(parsers ...messages.Parser) *EventParser

func (EventParser) Parse

func (p EventParser) Parse(msg string) (messages.Message, error)

type InstanceStatusController added in v1.12.0

type InstanceStatusController struct {
	InterruptionHandler
	// contains filtered or unexported fields
}

InstanceStatusController polls EC2 DescribeInstanceStatus to detect unhealthy instances and scheduled maintenance events, then cordons and drains affected nodes.

func NewInstanceStatusController added in v1.12.0

func NewInstanceStatusController(
	kubeClient client.Client,
	cloudProvider cloudprovider.CloudProvider,
	recorder events.Recorder,
	instanceStatusProvider instancestatus.Provider,
) *InstanceStatusController

func (*InstanceStatusController) Reconcile added in v1.12.0

func (*InstanceStatusController) Register added in v1.12.0

type InterruptionHandler added in v1.12.0

type InterruptionHandler struct {
	// contains filtered or unexported fields
}

InterruptionHandler contains shared logic for handling interruption messages from both the SQS queue and the DescribeInstanceStatus API.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL