Documentation
¶
Overview ¶
Package v1alpha1 contains API Schema definitions for the inference v1alpha1 API group +kubebuilder:object:generate=true +groupName=inference.llmaz.io
Index ¶
Constants ¶
const ( // PlaygroundProgressing means the Playground is progressing now, such as waiting for the // inference service creation, rolling update or scaling up and down. PlaygroundProgressing = "Progressing" // PlaygroundAvailable indicates the corresponding inference service is available now. PlaygroundAvailable string = "Available" )
const ( // ServiceAvailable means the inferenceService is available and all the // workloads are running as expected. ServiceAvailable = "Available" // ServiceProgressing means the inferenceService is progressing now, such as // in creation, rolling update or scaling up and down. ServiceProgressing = "Progressing" )
Variables ¶
var ( // GroupVersion is group version used to register these objects GroupVersion = schema.GroupVersion{Group: "inference.llmaz.io", Version: "v1alpha1"} // SchemeGroupVersion is alias to GroupVersion for client-go libraries. // It is required by pkg/client/informers/externalversions/... SchemeGroupVersion = GroupVersion // SchemeBuilder is used to add go types to the GroupVersionKind scheme SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} // AddToScheme adds the types in this group-version to the given scheme. AddToScheme = SchemeBuilder.AddToScheme )
Functions ¶
func Resource ¶
func Resource(resource string) schema.GroupResource
Resource is required by pkg/client/listers/...
Types ¶
type BackendConfig ¶
type BackendConfig struct {
// Name represents the inference backend under the hood, e.g. vLLM.
// +kubebuilder:validation:Enum={vllm,sglang,llamacpp}
// +kubebuilder:default=vllm
// +optional
Name *BackendName `json:"name,omitempty"`
// Version represents the backend version if you want a different one
// from the default version.
// +optional
Version *string `json:"version,omitempty"`
// Args represents the arguments passed to the backend.
// +optional
Args []string `json:"args,omitempty"`
// Envs represents the environments set to the container.
// +optional
Envs []corev1.EnvVar `json:"envs,omitempty"`
// Resources represents the resource requirements for backend, like cpu/mem,
// accelerators like GPU should not be defined here, but at the Model flavors,
// or the same accelerator requirements defined there will be covered and
// the workload will lose the fungibility capacity.
Resources *ResourceRequirements `json:"resources,omitempty"`
}
func (*BackendConfig) DeepCopy ¶
func (in *BackendConfig) DeepCopy() *BackendConfig
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new BackendConfig.
func (*BackendConfig) DeepCopyInto ¶
func (in *BackendConfig) DeepCopyInto(out *BackendConfig)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type BackendName ¶
type BackendName string
const ( LLAMACPP BackendName = "llamacpp" SGLANG BackendName = "sglang" VLLM BackendName = "vllm" DefaultBackend BackendName = VLLM )
type ElasticConfig ¶
type ElasticConfig struct {
// MinReplicas indicates the minimum number of inference workloads based on the traffic.
// Default to nil means we can scale down the instances to 1.
// If minReplicas set to 0, it requires to install serverless component at first.
// +kubebuilder:default=1
// +optional
MinReplicas *int32 `json:"minReplicas,omitempty"`
// MaxReplicas indicates the maximum number of inference workloads based on the traffic.
// Default to nil means there's no limit for the instance number.
// +optional
MaxReplicas *int32 `json:"maxReplicas,omitempty"`
}
func (*ElasticConfig) DeepCopy ¶
func (in *ElasticConfig) DeepCopy() *ElasticConfig
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ElasticConfig.
func (*ElasticConfig) DeepCopyInto ¶
func (in *ElasticConfig) DeepCopyInto(out *ElasticConfig)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type Playground ¶
type Playground struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`
Spec PlaygroundSpec `json:"spec,omitempty"`
Status PlaygroundStatus `json:"status,omitempty"`
}
Playground is the Schema for the playgrounds API
func (*Playground) DeepCopy ¶
func (in *Playground) DeepCopy() *Playground
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Playground.
func (*Playground) DeepCopyInto ¶
func (in *Playground) DeepCopyInto(out *Playground)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*Playground) DeepCopyObject ¶
func (in *Playground) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type PlaygroundList ¶
type PlaygroundList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitempty"`
Items []Playground `json:"items"`
}
PlaygroundList contains a list of Playground
func (*PlaygroundList) DeepCopy ¶
func (in *PlaygroundList) DeepCopy() *PlaygroundList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PlaygroundList.
func (*PlaygroundList) DeepCopyInto ¶
func (in *PlaygroundList) DeepCopyInto(out *PlaygroundList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*PlaygroundList) DeepCopyObject ¶
func (in *PlaygroundList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type PlaygroundSpec ¶
type PlaygroundSpec struct {
// Replicas represents the replica number of inference workloads.
// +kubebuilder:default=1
// +optional
Replicas *int32 `json:"replicas,omitempty"`
// ModelClaim represents one modelClaim, it's a simple configuration
// compared to multiModelsClaims only work for one model and one claim.
// ModelClaim and multiModelsClaims are exclusive configured.
// Note: properties (nodeSelectors, resources, e.g.) of the model flavors
// will be applied to the workload if not exist.
// +optional
ModelClaim *coreapi.ModelClaim `json:"modelClaim,omitempty"`
// MultiModelsClaims represents multiple modelClaim, which is useful when different
// sub-workload has different accelerator requirements, like the state-of-the-art
// technology called splitwise, the workload template is shared by both.
// ModelClaim and multiModelsClaims are exclusive configured.
// +optional
MultiModelsClaims []coreapi.MultiModelsClaim `json:"multiModelsClaims,omitempty"`
// BackendConfig represents the inference backend configuration
// under the hood, e.g. vLLM, which is the default backend.
// +optional
BackendConfig *BackendConfig `json:"backendConfig,omitempty"`
}
PlaygroundSpec defines the desired state of Playground
func (*PlaygroundSpec) DeepCopy ¶
func (in *PlaygroundSpec) DeepCopy() *PlaygroundSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PlaygroundSpec.
func (*PlaygroundSpec) DeepCopyInto ¶
func (in *PlaygroundSpec) DeepCopyInto(out *PlaygroundSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type PlaygroundStatus ¶
type PlaygroundStatus struct {
// Conditions represents the Inference condition.
Conditions []metav1.Condition `json:"conditions,omitempty"`
}
PlaygroundStatus defines the observed state of Playground
func (*PlaygroundStatus) DeepCopy ¶
func (in *PlaygroundStatus) DeepCopy() *PlaygroundStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PlaygroundStatus.
func (*PlaygroundStatus) DeepCopyInto ¶
func (in *PlaygroundStatus) DeepCopyInto(out *PlaygroundStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ResourceRequirements ¶
type ResourceRequirements struct {
// Limits describes the maximum amount of compute resources allowed.
// More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
// +optional
Limits corev1.ResourceList `json:"limits,omitempty"`
// Requests describes the minimum amount of compute resources required.
// If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
// otherwise to an implementation-defined value. Requests cannot exceed Limits.
// More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
// +optional
Requests corev1.ResourceList `json:"requests,omitempty"`
}
TODO: Do not support DRA yet, we can support that once needed.
func (*ResourceRequirements) DeepCopy ¶
func (in *ResourceRequirements) DeepCopy() *ResourceRequirements
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ResourceRequirements.
func (*ResourceRequirements) DeepCopyInto ¶
func (in *ResourceRequirements) DeepCopyInto(out *ResourceRequirements)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type Service ¶
type Service struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`
Spec ServiceSpec `json:"spec,omitempty"`
Status ServiceStatus `json:"status,omitempty"`
}
Service is the Schema for the services API
func (*Service) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Service.
func (*Service) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*Service) DeepCopyObject ¶
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type ServiceList ¶
type ServiceList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitempty"`
Items []Service `json:"items"`
}
ServiceList contains a list of Service
func (*ServiceList) DeepCopy ¶
func (in *ServiceList) DeepCopy() *ServiceList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceList.
func (*ServiceList) DeepCopyInto ¶
func (in *ServiceList) DeepCopyInto(out *ServiceList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*ServiceList) DeepCopyObject ¶
func (in *ServiceList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type ServiceSpec ¶
type ServiceSpec struct {
// MultiModelsClaims represents multiple modelClaim, which is useful when different
// sub-workload has different accelerator requirements, like the state-of-the-art
// technology called splitwise, the workload template is shared by both.
// Most of the time, one modelClaim is enough.
// Note: properties (nodeSelectors, resources, e.g.) of the model flavors
// will be applied to the workload if not exist.
// +kubebuilder:validation:MinItems=1
MultiModelsClaims []coreapi.MultiModelsClaim `json:"multiModelsClaims,omitempty"`
// WorkloadTemplate defines the underlying workload layout and configuration.
// Note: the LWS spec might be twisted with various LWS instances to support
// accelerator fungibility or other cutting-edge researches.
// LWS supports both single-host and multi-host scenarios, for single host
// cases, only need to care about replicas, rolloutStrategy and workerTemplate.
WorkloadTemplate lws.LeaderWorkerSetSpec `json:"workloadTemplate"`
// ElasticConfig defines the configuration for elastic usage,
// e.g. the max/min replicas. Default to 0 ~ Inf+.
// This requires to install the HPA first or will not work.
// +optional
ElasticConfig *ElasticConfig `json:"elasticConfig,omitempty"`
}
ServiceSpec defines the desired state of Service. Service controller will maintain multi-flavor of workloads with different accelerators for cost or performance considerations.
func (*ServiceSpec) DeepCopy ¶
func (in *ServiceSpec) DeepCopy() *ServiceSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceSpec.
func (*ServiceSpec) DeepCopyInto ¶
func (in *ServiceSpec) DeepCopyInto(out *ServiceSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ServiceStatus ¶
type ServiceStatus struct {
// Conditions represents the Inference condition.
Conditions []metav1.Condition `json:"conditions,omitempty"`
}
ServiceStatus defines the observed state of Service
func (*ServiceStatus) DeepCopy ¶
func (in *ServiceStatus) DeepCopy() *ServiceStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceStatus.
func (*ServiceStatus) DeepCopyInto ¶
func (in *ServiceStatus) DeepCopyInto(out *ServiceStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.