Documentation
¶
Overview ¶
Package v1alpha1 contains API Schema definitions for the llmd v1alpha1 API group. +kubebuilder:object:generate=true +groupName=llmd.ai
Index ¶
- Constants
- Variables
- func GetCondition(va *VariantAutoscaling, conditionType string) *metav1.Condition
- func IsConditionFalse(va *VariantAutoscaling, conditionType string) bool
- func IsConditionTrue(va *VariantAutoscaling, conditionType string) bool
- func SetCondition(va *VariantAutoscaling, conditionType string, status metav1.ConditionStatus, ...)
- type AcceleratorProfile
- type ActuationStatus
- type Allocation
- type ConfigMapKeyRef
- type LoadProfile
- type ModelProfile
- type OptimizedAlloc
- type PerfParms
- type VariantAutoscaling
- type VariantAutoscalingList
- type VariantAutoscalingSpec
- type VariantAutoscalingStatus
Constants ¶
const ( // TypeMetricsAvailable indicates whether vLLM metrics are available from Prometheus TypeMetricsAvailable = "MetricsAvailable" // TypeOptimizationReady indicates whether the optimization engine can run successfully TypeOptimizationReady = "OptimizationReady" )
Condition Types for VariantAutoscaling
const ( // ReasonMetricsFound indicates vLLM metrics were successfully retrieved ReasonMetricsFound = "MetricsFound" // ReasonMetricsMissing indicates vLLM metrics are not available (likely ServiceMonitor issue) ReasonMetricsMissing = "MetricsMissing" // ReasonMetricsStale indicates metrics exist but are outdated ReasonMetricsStale = "MetricsStale" // ReasonPrometheusError indicates error querying Prometheus ReasonPrometheusError = "PrometheusError" )
Condition Reasons for MetricsAvailable
const ( // ReasonOptimizationSucceeded indicates optimization completed successfully ReasonOptimizationSucceeded = "OptimizationSucceeded" // ReasonOptimizationFailed indicates optimization failed ReasonOptimizationFailed = "OptimizationFailed" ReasonMetricsUnavailable = "MetricsUnavailable" )
Condition Reasons for OptimizationReady
Variables ¶
var ( // GroupVersion is group version used to register these objects. GroupVersion = schema.GroupVersion{Group: "llmd.ai", Version: "v1alpha1"} // SchemeBuilder is used to add go types to the GroupVersionKind scheme. SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} // AddToScheme adds the types in this group-version to the given scheme. AddToScheme = SchemeBuilder.AddToScheme )
Functions ¶
func GetCondition ¶ added in v0.0.2
func GetCondition(va *VariantAutoscaling, conditionType string) *metav1.Condition
GetCondition returns the condition with the specified type
func IsConditionFalse ¶ added in v0.0.2
func IsConditionFalse(va *VariantAutoscaling, conditionType string) bool
IsConditionFalse returns true if the condition with the specified type has status False
func IsConditionTrue ¶ added in v0.0.2
func IsConditionTrue(va *VariantAutoscaling, conditionType string) bool
IsConditionTrue returns true if the condition with the specified type has status True
func SetCondition ¶ added in v0.0.2
func SetCondition(va *VariantAutoscaling, conditionType string, status metav1.ConditionStatus, reason, message string)
SetCondition sets the specified condition on the VariantAutoscaling status
Types ¶
type AcceleratorProfile ¶
type AcceleratorProfile struct {
// Acc specifies the type or name of the accelerator (e.g., GPU type).
// +kubebuilder:validation:MinLength=1
Acc string `json:"acc"`
// AccCount specifies the number of accelerator units to be used.
// +kubebuilder:validation:Minimum=1
AccCount int `json:"accCount"`
// PerParms specifies the prefill and decode parameters for ttft and itl models
PerfParms PerfParms `json:"perfParms"`
// MaxBatchSize is the maximum batch size supported by the accelerator.
// +kubebuilder:validation:Minimum=1
MaxBatchSize int `json:"maxBatchSize"`
}
AcceleratorProfile defines the configuration for an accelerator used in autoscaling. It specifies the type and count of accelerator, as well as parameters for scaling behavior.
func (*AcceleratorProfile) DeepCopy ¶
func (in *AcceleratorProfile) DeepCopy() *AcceleratorProfile
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AcceleratorProfile.
func (*AcceleratorProfile) DeepCopyInto ¶
func (in *AcceleratorProfile) DeepCopyInto(out *AcceleratorProfile)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ActuationStatus ¶
type ActuationStatus struct {
// Applied indicates whether the actuation was successfully applied.
Applied bool `json:"applied"`
}
ActuationStatus provides details about the actuation process and its current status.
func (*ActuationStatus) DeepCopy ¶
func (in *ActuationStatus) DeepCopy() *ActuationStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ActuationStatus.
func (*ActuationStatus) DeepCopyInto ¶
func (in *ActuationStatus) DeepCopyInto(out *ActuationStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type Allocation ¶
type Allocation struct {
// Accelerator is the type of accelerator currently allocated.
// +kubebuilder:validation:MinLength=1
Accelerator string `json:"accelerator"`
// NumReplicas is the number of replicas currently allocated.
// +kubebuilder:validation:Minimum=0
NumReplicas int `json:"numReplicas"`
// MaxBatch is the maximum batch size currently allocated.
// +kubebuilder:validation:Minimum=0
MaxBatch int `json:"maxBatch"`
// VariantCost is the cost associated with the current variant allocation.
// +kubebuilder:validation:Pattern=`^\d+(\.\d+)?$`
VariantCost string `json:"variantCost"`
// ITLAverage is the average inter token latency for the current allocation.
// +kubebuilder:validation:Pattern=`^\d+(\.\d+)?$`
ITLAverage string `json:"itlAverage"`
// TTFTAverage is the average time to first token for the current allocation
// +kubebuilder:validation:Pattern=`^\d+(\.\d+)?$`
TTFTAverage string `json:"ttftAverage"`
// Load describes the workload characteristics for the current allocation.
Load LoadProfile `json:"load"`
}
Allocation describes the current resource allocation for a model variant.
func (*Allocation) DeepCopy ¶
func (in *Allocation) DeepCopy() *Allocation
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Allocation.
func (*Allocation) DeepCopyInto ¶
func (in *Allocation) DeepCopyInto(out *Allocation)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ConfigMapKeyRef ¶
type ConfigMapKeyRef struct {
// Name is the name of the ConfigMap.
// +kubebuilder:validation:MinLength=1
Name string `json:"name"`
// Key is the key within the ConfigMap.
// +kubebuilder:validation:MinLength=1
Key string `json:"key"`
}
ConfigMapKeyRef references a specific key within a ConfigMap.
func (*ConfigMapKeyRef) DeepCopy ¶
func (in *ConfigMapKeyRef) DeepCopy() *ConfigMapKeyRef
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ConfigMapKeyRef.
func (*ConfigMapKeyRef) DeepCopyInto ¶
func (in *ConfigMapKeyRef) DeepCopyInto(out *ConfigMapKeyRef)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type LoadProfile ¶
type LoadProfile struct {
// ArrivalRate is the rate of incoming requests in inference server.
ArrivalRate string `json:"arrivalRate"`
// AvgInputTokens is the average number of input(prefill) tokens per request in inference server.
AvgInputTokens string `json:"avgInputTokens"`
// AvgOutputTokens is the average number of output(decode) tokens per request in inference server.
AvgOutputTokens string `json:"avgOutputTokens"`
}
LoadProfile represents the configuration for workload characteristics, including the rate of incoming requests (ArrivalRate) and the average length of each request (AvgLength). Both fields are specified as strings to allow flexible input formats.
func (*LoadProfile) DeepCopy ¶
func (in *LoadProfile) DeepCopy() *LoadProfile
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LoadProfile.
func (*LoadProfile) DeepCopyInto ¶
func (in *LoadProfile) DeepCopyInto(out *LoadProfile)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ModelProfile ¶
type ModelProfile struct {
// Accelerators is a list of accelerator profiles for the model variant.
// +kubebuilder:validation:MinItems=1
Accelerators []AcceleratorProfile `json:"accelerators"`
}
ModelProfile provides resource and performance characteristics for the model variant.
func (*ModelProfile) DeepCopy ¶
func (in *ModelProfile) DeepCopy() *ModelProfile
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelProfile.
func (*ModelProfile) DeepCopyInto ¶
func (in *ModelProfile) DeepCopyInto(out *ModelProfile)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type OptimizedAlloc ¶
type OptimizedAlloc struct {
// LastRunTime is the timestamp of the last optimization run.
LastRunTime metav1.Time `json:"lastRunTime,omitempty"`
// Accelerator is the type of accelerator for the optimized allocation.
// +kubebuilder:validation:MinLength=2
Accelerator string `json:"accelerator"`
// NumReplicas is the number of replicas for the optimized allocation.
// +kubebuilder:validation:Minimum=0
NumReplicas int `json:"numReplicas"`
}
OptimizedAlloc describes the target optimized allocation for a model variant.
func (*OptimizedAlloc) DeepCopy ¶
func (in *OptimizedAlloc) DeepCopy() *OptimizedAlloc
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OptimizedAlloc.
func (*OptimizedAlloc) DeepCopyInto ¶
func (in *OptimizedAlloc) DeepCopyInto(out *OptimizedAlloc)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type PerfParms ¶
type PerfParms struct {
// DecodeParms contains parameters for the decode phase (ITL calculation)
// Expected keys: "alpha", "beta" for equation: itl = alpha + beta * maxBatchSize
// +kubebuilder:validation:MinProperties=1
DecodeParms map[string]string `json:"decodeParms"`
// PrefillParms contains parameters for the prefill phase (TTFT calculation)
// Expected keys: "gamma", "delta" for equation: ttft = gamma + delta * tokens * maxBatchSize
// +kubebuilder:validation:MinProperties=1
PrefillParms map[string]string `json:"prefillParms"`
}
func (*PerfParms) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PerfParms.
func (*PerfParms) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type VariantAutoscaling ¶
type VariantAutoscaling struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`
// Spec defines the desired state for autoscaling the model variant.
Spec VariantAutoscalingSpec `json:"spec,omitempty"`
// Status represents the current status of autoscaling for the model variant.
Status VariantAutoscalingStatus `json:"status,omitempty"`
}
VariantAutoscaling is the Schema for the variantautoscalings API. It represents the autoscaling configuration and status for a model variant.
func (*VariantAutoscaling) DeepCopy ¶
func (in *VariantAutoscaling) DeepCopy() *VariantAutoscaling
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VariantAutoscaling.
func (*VariantAutoscaling) DeepCopyInto ¶
func (in *VariantAutoscaling) DeepCopyInto(out *VariantAutoscaling)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*VariantAutoscaling) DeepCopyObject ¶
func (in *VariantAutoscaling) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type VariantAutoscalingList ¶
type VariantAutoscalingList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitempty"`
// Items is the list of VariantAutoscaling resources.
Items []VariantAutoscaling `json:"items"`
}
VariantAutoscalingList contains a list of VariantAutoscaling resources. +kubebuilder:object:root=true
func (*VariantAutoscalingList) DeepCopy ¶
func (in *VariantAutoscalingList) DeepCopy() *VariantAutoscalingList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VariantAutoscalingList.
func (*VariantAutoscalingList) DeepCopyInto ¶
func (in *VariantAutoscalingList) DeepCopyInto(out *VariantAutoscalingList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*VariantAutoscalingList) DeepCopyObject ¶
func (in *VariantAutoscalingList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type VariantAutoscalingSpec ¶
type VariantAutoscalingSpec struct {
// ModelID specifies the unique identifier of the model to be autoscaled.
// +kubebuilder:validation:MinLength=1
// +kubebuilder:validation:Required
ModelID string `json:"modelID"`
// SLOClassRef references the ConfigMap key containing Service Level Objective (SLO) configuration.
// +kubebuilder:validation:Required
SLOClassRef ConfigMapKeyRef `json:"sloClassRef"`
// ModelProfile provides resource and performance characteristics for the model variant.
// +kubebuilder:validation:Required
ModelProfile ModelProfile `json:"modelProfile"`
}
VariantAutoscalingSpec defines the desired state for autoscaling a model variant.
func (*VariantAutoscalingSpec) DeepCopy ¶
func (in *VariantAutoscalingSpec) DeepCopy() *VariantAutoscalingSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VariantAutoscalingSpec.
func (*VariantAutoscalingSpec) DeepCopyInto ¶
func (in *VariantAutoscalingSpec) DeepCopyInto(out *VariantAutoscalingSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type VariantAutoscalingStatus ¶
type VariantAutoscalingStatus struct {
// CurrentAlloc specifies the current resource allocation for the variant.
CurrentAlloc Allocation `json:"currentAlloc,omitempty"`
// DesiredOptimizedAlloc indicates the target optimized allocation based on autoscaling logic.
DesiredOptimizedAlloc OptimizedAlloc `json:"desiredOptimizedAlloc,omitempty"`
// Actuation provides details about the actuation process and its current status.
Actuation ActuationStatus `json:"actuation,omitempty"`
// Conditions represent the latest available observations of the VariantAutoscaling's state
// +optional
// +patchMergeKey=type
// +patchStrategy=merge
// +listType=map
// +listMapKey=type
Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"`
}
VariantAutoscalingStatus represents the current status of autoscaling for a variant, including the current allocation, desired optimized allocation, and actuation status.
func (*VariantAutoscalingStatus) DeepCopy ¶
func (in *VariantAutoscalingStatus) DeepCopy() *VariantAutoscalingStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VariantAutoscalingStatus.
func (*VariantAutoscalingStatus) DeepCopyInto ¶
func (in *VariantAutoscalingStatus) DeepCopyInto(out *VariantAutoscalingStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.