scaler

package

v0.4.1 Latest Latest Go to latest Published: Aug 19, 2025 License: Apache-2.0 Imports: 18 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/vllm-project/aibrix

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
func CalculatePodRequests(pods []*v1.Pod, container string, resource v1.ResourceName) (map[string]int64, error)
func GetReadyPodsCount(ctx context.Context, podLister client.Client, namespace string, ...) (int64, error)
func GroupPods(pods []*v1.Pod, metrics metrics.PodMetricsInfo, resource v1.ResourceName, ...) (readyPodCount int, unreadyPods, missingPods, ignoredPods sets.Set[string])
func RemoveMetricsForPods(metrics metrics.PodMetricsInfo, pods sets.Set[string])
type ApaAutoscaler
- func NewApaAutoscaler(readyPodsCount int, pa *autoscalingv1alpha1.PodAutoscaler) (*ApaAutoscaler, error)
- func (a *ApaAutoscaler) GetScalingContext() common.ScalingContext
- func (a *ApaAutoscaler) Scale(originalReadyPodsCount int, metricKey metrics.NamespaceNameMetric, ...) ScaleResult
- func (a *ApaAutoscaler) UpdateScaleTargetMetrics(ctx context.Context, metricKey metrics.NamespaceNameMetric, ...) error
- func (a *ApaAutoscaler) UpdateScalingContext(pa autoscalingv1alpha1.PodAutoscaler) error
- func (a *ApaAutoscaler) UpdateSourceMetrics(ctx context.Context, metricKey metrics.NamespaceNameMetric, ...) error
type ApaScalingContext
- func NewApaScalingContext() *ApaScalingContext
- func NewApaScalingContextByPa(pa *autoscalingv1alpha1.PodAutoscaler) (*ApaScalingContext, error)
- func (a *ApaScalingContext) GetDownFluctuationTolerance() float64
- func (a *ApaScalingContext) GetUpFluctuationTolerance() float64
- func (a *ApaScalingContext) UpdateByPaTypes(pa *autoscalingv1alpha1.PodAutoscaler) error
type KpaAutoscaler
- func NewKpaAutoscaler(readyPodsCount int, pa *autoscalingv1alpha1.PodAutoscaler, now time.Time) (*KpaAutoscaler, error)
- func (k *KpaAutoscaler) GetScalingContext() scalingcontext.ScalingContext
- func (k *KpaAutoscaler) InPanicMode() bool
- func (k *KpaAutoscaler) Scale(originalReadyPodsCount int, metricKey metrics.NamespaceNameMetric, ...) ScaleResult
- func (k *KpaAutoscaler) UpdateScaleTargetMetrics(ctx context.Context, metricKey metrics.NamespaceNameMetric, ...) error
- func (k *KpaAutoscaler) UpdateScalingContext(pa autoscalingv1alpha1.PodAutoscaler) error
- func (k *KpaAutoscaler) UpdateSourceMetrics(ctx context.Context, metricKey metrics.NamespaceNameMetric, ...) error
type KpaScalingContext
- func NewKpaScalingContext() *KpaScalingContext
- func NewKpaScalingContextByPa(pa *autoscalingv1alpha1.PodAutoscaler) (*KpaScalingContext, error)
- func (k *KpaScalingContext) UpdateByPaTypes(pa *autoscalingv1alpha1.PodAutoscaler) error
type ScaleResult
type Scaler
- func NewAutoscalerFactory(strategy autoscalingv1alpha1.ScalingStrategyType) (Scaler, error)

Constants ¶

View Source

const (
	APALabelPrefix = "apa." + scalingcontext.AutoscalingLabelPrefix
)

View Source

const (
	KPALabelPrefix = "kpa." + scalingcontext.AutoscalingLabelPrefix
)

Variables ¶

This section is empty.

Functions ¶

func CalculatePodRequests ¶

func CalculatePodRequests(pods []*v1.Pod, container string, resource v1.ResourceName) (map[string]int64, error)

func GetReadyPodsCount ¶

func GetReadyPodsCount(ctx context.Context, podLister client.Client, namespace string, selector labels.Selector) (int64, error)

func GroupPods ¶

func GroupPods(pods []*v1.Pod, metrics metrics.PodMetricsInfo, resource v1.ResourceName, cpuInitializationPeriod, delayOfInitialReadinessStatus time.Duration) (readyPodCount int, unreadyPods, missingPods, ignoredPods sets.Set[string])

func RemoveMetricsForPods ¶

func RemoveMetricsForPods(metrics metrics.PodMetricsInfo, pods sets.Set[string])

Types ¶

type ApaAutoscaler ¶

type ApaAutoscaler struct {
	Status ScaleResult
	// contains filtered or unexported fields
}

func NewApaAutoscaler ¶

func NewApaAutoscaler(readyPodsCount int, pa *autoscalingv1alpha1.PodAutoscaler) (*ApaAutoscaler, error)

NewApaAutoscaler Initialize ApaAutoscaler

func (*ApaAutoscaler) GetScalingContext ¶

func (a *ApaAutoscaler) GetScalingContext() common.ScalingContext

func (*ApaAutoscaler) Scale ¶

func (a *ApaAutoscaler) Scale(originalReadyPodsCount int, metricKey metrics.NamespaceNameMetric, now time.Time) ScaleResult

func (*ApaAutoscaler) UpdateScaleTargetMetrics ¶

func (a *ApaAutoscaler) UpdateScaleTargetMetrics(ctx context.Context, metricKey metrics.NamespaceNameMetric, source autoscalingv1alpha1.MetricSource, pods []v1.Pod, now time.Time) error

func (*ApaAutoscaler) UpdateScalingContext ¶

func (a *ApaAutoscaler) UpdateScalingContext(pa autoscalingv1alpha1.PodAutoscaler) error

func (*ApaAutoscaler) UpdateSourceMetrics ¶

func (a *ApaAutoscaler) UpdateSourceMetrics(ctx context.Context, metricKey metrics.NamespaceNameMetric, source autoscalingv1alpha1.MetricSource, now time.Time) error

type ApaScalingContext ¶

type ApaScalingContext struct {
	scalingcontext.BaseScalingContext

	// The two following attributes are specific to APA.
	// UpFluctuationTolerance represents the threshold before scaling up,
	// which means no scaling up will occur unless the currentMetricValue exceeds the TargetValue by more than UpFluctuationTolerance
	UpFluctuationTolerance float64
	// UpFluctuationTolerance represents the threshold before scaling down,
	// which means no scaling down will occur unless the currentMetricValue is less than the TargetValue by more than UpFluctuationTolerance
	DownFluctuationTolerance float64
	// metric window length
	Window time.Duration
}

ApaScalingContext defines parameters for scaling decisions.

func NewApaScalingContext ¶

func NewApaScalingContext() *ApaScalingContext

NewApaScalingContext references KPA and sets up a default configuration.

func NewApaScalingContextByPa ¶

func NewApaScalingContextByPa(pa *autoscalingv1alpha1.PodAutoscaler) (*ApaScalingContext, error)

NewApaScalingContextByPa initializes ApaScalingContext by passed-in PodAutoscaler description

func (*ApaScalingContext) GetDownFluctuationTolerance ¶

func (a *ApaScalingContext) GetDownFluctuationTolerance() float64

func (*ApaScalingContext) GetUpFluctuationTolerance ¶

func (a *ApaScalingContext) GetUpFluctuationTolerance() float64

func (*ApaScalingContext) UpdateByPaTypes ¶

func (a *ApaScalingContext) UpdateByPaTypes(pa *autoscalingv1alpha1.PodAutoscaler) error

type KpaAutoscaler ¶

type KpaAutoscaler struct {
	Status *ScaleResult
	// contains filtered or unexported fields
}

func NewKpaAutoscaler ¶

func NewKpaAutoscaler(readyPodsCount int, pa *autoscalingv1alpha1.PodAutoscaler, now time.Time) (*KpaAutoscaler, error)

NewKpaAutoscaler Initialize KpaAutoscaler: Referenced from `knative/pkg/autoscaler/scaling/autoscaler.go newAutoscaler`

func (*KpaAutoscaler) GetScalingContext ¶

func (k *KpaAutoscaler) GetScalingContext() scalingcontext.ScalingContext

func (*KpaAutoscaler) InPanicMode ¶

func (k *KpaAutoscaler) InPanicMode() bool

func (*KpaAutoscaler) Scale ¶

func (k *KpaAutoscaler) Scale(originalReadyPodsCount int, metricKey metrics.NamespaceNameMetric, now time.Time) ScaleResult

Scale implements Scaler interface in KpaAutoscaler. Refer to knative-serving: pkg/autoscaler/scaling/autoscaler.go, Scale function.

func (*KpaAutoscaler) UpdateScaleTargetMetrics ¶

func (k *KpaAutoscaler) UpdateScaleTargetMetrics(ctx context.Context, metricKey metrics.NamespaceNameMetric, source autoscalingv1alpha1.MetricSource, pods []v1.Pod, now time.Time) error

func (*KpaAutoscaler) UpdateScalingContext ¶

func (k *KpaAutoscaler) UpdateScalingContext(pa autoscalingv1alpha1.PodAutoscaler) error

func (*KpaAutoscaler) UpdateSourceMetrics ¶

func (k *KpaAutoscaler) UpdateSourceMetrics(ctx context.Context, metricKey metrics.NamespaceNameMetric, source autoscalingv1alpha1.MetricSource, now time.Time) error

type KpaScalingContext ¶

type KpaScalingContext struct {
	scalingcontext.BaseScalingContext

	// The burst capacity that user wants to maintain without queuing at the POD level.
	// Note, that queueing still might happen due to the non-ideal load balancing.
	TargetBurstCapacity float64
	// ActivationScale is the minimum, non-zero value that a service should scale to.
	// For example, if ActivationScale = 2, when a service scaled from zero it would
	// scale up two replicas in this case. In essence, this allows one to set both a
	// min-scale value while also preserving the ability to scale to zero.
	// ActivationScale must be >= 1.
	ActivationScale int32

	// TODO: Note that the following attributes are specific to Knative; but we retain them here temporarily.
	// PanicThreshold is the threshold at which panic mode is entered. It represents
	// a factor of the currently observed load over the panic window over the ready
	// pods. I.e. if this is 2, panic mode will be entered if the observed metric
	// is twice as high as the current population can handle.
	PanicThreshold float64
	// StableWindow is needed to determine when to exit panic mode.
	StableWindow time.Duration
	// PanicWindow is needed to determine when to exit panic mode.
	PanicWindow time.Duration
	// ScaleDownDelay is the time that must pass at reduced concurrency before a
	// scale-down decision is applied.
	ScaleDownDelay time.Duration
}

KpaScalingContext defines parameters for scaling decisions.

func NewKpaScalingContext ¶

func NewKpaScalingContext() *KpaScalingContext

NewKpaScalingContext references KPA and sets up a default configuration.

func NewKpaScalingContextByPa ¶

func NewKpaScalingContextByPa(pa *autoscalingv1alpha1.PodAutoscaler) (*KpaScalingContext, error)

NewKpaScalingContextByPa initializes KpaScalingContext by passed-in PodAutoscaler description

func (*KpaScalingContext) UpdateByPaTypes ¶

func (k *KpaScalingContext) UpdateByPaTypes(pa *autoscalingv1alpha1.PodAutoscaler) error

type ScaleResult ¶

type ScaleResult struct {
	// DesiredPodCount is the number of pods Autoscaler suggests for the revision.
	DesiredPodCount int32
	// ExcessBurstCapacity is computed headroom of the revision taking into
	// the account target burst capacity.
	ExcessBurstCapacity int32
	// ScaleValid specifies whether this scale result is valid, i.e. whether
	// Autoscaler had all the necessary information to compute a suggestion.
	ScaleValid bool
}

ScaleResult contains the results of a scaling decision.

type Scaler ¶

type Scaler interface {
	// UpdateScaleTargetMetrics updates the current state of metrics used to determine scaling actions.
	// It processes the latest metrics for a given scaling target (identified by metricKey) and stores
	// these values for later use during scaling decisions.
	//
	// Parameters:
	// - ctx: The context used for managing request-scoped values, cancellation, and deadlines.
	// - metricKey: A unique identifier for the scaling target's metrics (e.g., CPU, memory, or QPS) that
	//   is used to correlate metrics with the appropriate scaling logic.
	// - now: The current time at which the metrics are being processed. This timestamp helps track
	//   when the last metric update occurred and can be used to calculate time-based scaling actions.
	//
	// This method ensures that the autoscaler has up-to-date metrics before making any scaling decisions.
	UpdateScaleTargetMetrics(ctx context.Context, metricKey metrics.NamespaceNameMetric, source autoscalingv1alpha1.MetricSource, pods []corev1.Pod, now time.Time) error

	// UpdateSourceMetrics updates the current state of metrics used to determine scaling actions.
	// It processes the latest metrics for a metrics source and stores
	// these values for later use during scaling decisions.
	//
	// Parameters:
	// - ctx: The context used for managing request-scoped values, cancellation, and deadlines.
	// - metricKey: A unique identifier for the scaling target's metrics (e.g., CPU, memory, or QPS) that
	//   is used to correlate metrics with the appropriate scaling logic.
	// - source: The MetricSource object containing the desired scaling configuration and current state.
	// - now: The current time at which the metrics are being processed. This timestamp helps track
	//   when the last metric update occurred and can be used to calculate time-based scaling actions.
	//
	// This method ensures that the autoscaler has up-to-date metrics before making any scaling decisions.
	UpdateSourceMetrics(ctx context.Context, metricKey metrics.NamespaceNameMetric, source autoscalingv1alpha1.MetricSource, now time.Time) error

	// Scale calculates the necessary scaling action based on observed metrics
	// and the current time. This is the core logic of the autoscaler.
	//
	// Parameters:
	// originalReadyPodsCount - the current number of ready pods.
	// metricKey - a unique key to identify the metric for scaling.
	// now - the current time, used to decide if scaling actions are needed based on timing rules or delays.
	//
	// Returns:
	// ScaleResult - contains the recommended number of pods to scale up or down.
	//
	// For reference: see the implementation in KpaAutoscaler.Scale.
	Scale(originalReadyPodsCount int, metricKey metrics.NamespaceNameMetric, now time.Time) ScaleResult

	// UpdateScalingContext updates the internal scaling context for a given PodAutoscaler (PA) instance.
	// It extracts necessary information from the provided PodAutoscaler resource, such as current
	// metrics, scaling parameters, and other relevant data to refresh the scaling context.
	//
	// Parameters:
	// - pa: The PodAutoscaler resource containing the desired scaling configuration and current state.
	//
	// Returns:
	// - error: If the context update fails due to invalid input or configuration issues, it returns an error.
	//
	// This method ensures that the internal scaling context is always in sync with the latest state
	// and configuration of the target PodAutoscaler, allowing accurate scaling decisions.
	UpdateScalingContext(pa autoscalingv1alpha1.PodAutoscaler) error

	// GetScalingContext retrieves the current scaling context used for making scaling decisions.
	// This method returns a pointer to the ScalingContext, which contains essential data like
	// target values, current metrics, and scaling tolerances.
	//
	// Returns:
	// - *common.ScalingContext: A pointer to the ScalingContext instance containing the relevant
	//   data for autoscaling logic.
	//
	// This method provides access to the scaling context for external components or logic that
	// need to read or adjust the current scaling parameters.
	GetScalingContext() common.ScalingContext
}

Scaler defines the interface for autoscaling operations. Any autoscaler implementation, such as KpaAutoscaler (Kubernetes Pod Autoscaler), must implement this interface to respond to scaling events.

func NewAutoscalerFactory ¶

func NewAutoscalerFactory(strategy autoscalingv1alpha1.ScalingStrategyType) (Scaler, error)

NewAutoscalerFactory creates an Autoscaler based on the given ScalingStrategy

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL