Documentation
¶
Overview ¶
Package v1alpha1 contains API Schema definitions for the model v1alpha1 API group. +kubebuilder:object:generate=true +groupName=model.otterscale.io
Index ¶
- Variables
- type AcceleratorSpec
- type AcceleratorType
- type EPPFailureMode
- type EndpointPickerSpec
- type EngineSpec
- type GatewayRef
- type HTTPRouteSpec
- type HuggingFaceSource
- type InferencePoolSpec
- type ModelArtifact
- type ModelArtifactList
- type ModelArtifactPhase
- type ModelArtifactSpec
- type ModelArtifactStatus
- type ModelService
- type ModelServiceList
- type ModelServicePhase
- type ModelServiceSpec
- type ModelServiceStatus
- type ModelSource
- type ModelSpec
- type MonitoringSpec
- type OCITarget
- type PackFormat
- type ParallelismSpec
- type PodMonitorSpec
- type ResourceReference
- type RoleSpec
- type RoutingProxySpec
- type SecretKeySelector
- type SecretReference
- type StorageSpec
Constants ¶
This section is empty.
Variables ¶
var ( // GroupVersion is group version used to register these objects. GroupVersion = schema.GroupVersion{Group: "model.otterscale.io", Version: "v1alpha1"} // SchemeBuilder is used to add go types to the GroupVersionKind scheme. SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes) // AddToScheme adds the types in this group-version to the given scheme. AddToScheme = SchemeBuilder.AddToScheme )
Functions ¶
This section is empty.
Types ¶
type AcceleratorSpec ¶ added in v0.8.22
type AcceleratorSpec struct {
// Type of accelerator hardware.
// The operator uses this to determine the appropriate resource name
// (e.g. nvidia.com/gpu, amd.com/gpu) and any accelerator-specific
// environment variables.
// +kubebuilder:validation:Enum=nvidia;amd;intel-gaudi;google;cpu
// +required
Type AcceleratorType `json:"type"`
}
AcceleratorSpec configures the accelerator hardware for serving pods.
func (*AcceleratorSpec) DeepCopy ¶ added in v0.8.22
func (in *AcceleratorSpec) DeepCopy() *AcceleratorSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AcceleratorSpec.
func (*AcceleratorSpec) DeepCopyInto ¶ added in v0.8.22
func (in *AcceleratorSpec) DeepCopyInto(out *AcceleratorSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type AcceleratorType ¶ added in v0.8.22
type AcceleratorType string
AcceleratorType specifies the accelerator hardware type. +enum
const ( AcceleratorNvidia AcceleratorType = "nvidia" AcceleratorAMD AcceleratorType = "amd" AcceleratorIntelGaudi AcceleratorType = "intel-gaudi" AcceleratorGoogle AcceleratorType = "google" AcceleratorCPU AcceleratorType = "cpu" )
type EPPFailureMode ¶ added in v0.8.22
type EPPFailureMode string
EPPFailureMode determines behavior when the Endpoint Picker is unavailable. +enum
const ( EPPFailureModeFail EPPFailureMode = "FailClose" EPPFailureModeOpen EPPFailureMode = "FailOpen" )
type EndpointPickerSpec ¶ added in v0.8.22
type EndpointPickerSpec struct {
// Image is the EPP container image.
// +kubebuilder:default="ghcr.io/llm-d/llm-d-inference-scheduler:v0.6.0"
// +optional
Image string `json:"image,omitempty"`
// Replicas is the number of EPP pod replicas.
// +kubebuilder:default=1
// +kubebuilder:validation:Minimum=0
// +optional
Replicas *int32 `json:"replicas,omitempty"`
// Resources for the EPP container (CPU, memory).
// CPU limits are intentionally left unset by default to allow bursting
// during scheduling spikes.
// +optional
Resources corev1.ResourceRequirements `json:"resources,omitempty"`
// Port of the EPP service (extProc gRPC port).
// +kubebuilder:default=9002
// +kubebuilder:validation:Minimum=1
// +kubebuilder:validation:Maximum=65535
// +optional
Port int32 `json:"port,omitempty"`
// FailureMode determines behavior when the EPP is unavailable.
// +kubebuilder:validation:Enum=FailOpen;FailClose
// +kubebuilder:default="FailOpen"
// +optional
FailureMode EPPFailureMode `json:"failureMode,omitempty"`
}
EndpointPickerSpec configures the Endpoint Picker (EPP) extension deployment. The EPP service name is automatically derived from the ModelService name.
func (*EndpointPickerSpec) DeepCopy ¶ added in v0.8.22
func (in *EndpointPickerSpec) DeepCopy() *EndpointPickerSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EndpointPickerSpec.
func (*EndpointPickerSpec) DeepCopyInto ¶ added in v0.8.22
func (in *EndpointPickerSpec) DeepCopyInto(out *EndpointPickerSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type EngineSpec ¶ added in v0.8.22
type EngineSpec struct {
// Image is the vLLM container image.
// +kubebuilder:default="ghcr.io/llm-d/llm-d-cuda:v0.5.1"
// +optional
Image string `json:"image,omitempty"`
// ImagePullPolicy for the engine container.
// +optional
ImagePullPolicy corev1.PullPolicy `json:"imagePullPolicy,omitempty"`
// Args are additional vLLM command-line arguments.
// The operator automatically sets --model, --port, --served-model-name,
// --tensor-parallel-size, and --data-parallel-size based on the spec.
// User-provided args are appended after the auto-generated ones.
// +optional
Args []string `json:"args,omitempty"`
// Env are additional environment variables for the engine container.
// +optional
Env []corev1.EnvVar `json:"env,omitempty"`
// Port is the port the inference engine listens on.
// When a routing proxy is enabled, this is the external port exposed by the proxy,
// and vLLM listens on routingProxy.targetPort instead.
// +kubebuilder:default=8000
// +kubebuilder:validation:Minimum=1
// +kubebuilder:validation:Maximum=65535
// +optional
Port int32 `json:"port,omitempty"`
}
EngineSpec configures the vLLM inference engine.
func (*EngineSpec) DeepCopy ¶ added in v0.8.22
func (in *EngineSpec) DeepCopy() *EngineSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EngineSpec.
func (*EngineSpec) DeepCopyInto ¶ added in v0.8.22
func (in *EngineSpec) DeepCopyInto(out *EngineSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GatewayRef ¶ added in v0.8.22
type GatewayRef struct {
// Name of the Gateway.
// +kubebuilder:validation:MinLength=1
// +required
Name string `json:"name"`
// Namespace of the Gateway. If empty, defaults to the ModelService namespace.
// +optional
Namespace string `json:"namespace,omitempty"`
}
GatewayRef references a Gateway API Gateway.
func (*GatewayRef) DeepCopy ¶ added in v0.8.22
func (in *GatewayRef) DeepCopy() *GatewayRef
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GatewayRef.
func (*GatewayRef) DeepCopyInto ¶ added in v0.8.22
func (in *GatewayRef) DeepCopyInto(out *GatewayRef)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type HTTPRouteSpec ¶ added in v0.8.22
type HTTPRouteSpec struct {
// GatewayRef references the Gateway to attach the HTTPRoute to.
// +required
GatewayRef GatewayRef `json:"gatewayRef"`
// Hostnames for the HTTPRoute. If empty, the route matches all hostnames.
// +optional
Hostnames []string `json:"hostnames,omitempty"`
}
HTTPRouteSpec configures a Gateway API HTTPRoute that routes traffic from a Gateway to the InferencePool backend.
func (*HTTPRouteSpec) DeepCopy ¶ added in v0.8.22
func (in *HTTPRouteSpec) DeepCopy() *HTTPRouteSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HTTPRouteSpec.
func (*HTTPRouteSpec) DeepCopyInto ¶ added in v0.8.22
func (in *HTTPRouteSpec) DeepCopyInto(out *HTTPRouteSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type HuggingFaceSource ¶
type HuggingFaceSource struct {
// Model is the HuggingFace model identifier (e.g. "microsoft/phi-4", "facebook/opt-125m").
// Must contain only alphanumerics, dots, underscores, hyphens, and slashes.
// +kubebuilder:validation:MinLength=1
// +kubebuilder:validation:MaxLength=253
// +kubebuilder:validation:Pattern="^[a-zA-Z0-9][a-zA-Z0-9._/-]*$"
// +required
Model string `json:"model"`
// Revision pins a specific branch, tag, or commit hash.
// If not specified, the default branch is used.
// Must contain only alphanumerics, dots, underscores, hyphens, and slashes.
// +kubebuilder:validation:MinLength=1
// +kubebuilder:validation:MaxLength=253
// +kubebuilder:validation:Pattern="^[a-zA-Z0-9._/-]*$"
// +optional
Revision string `json:"revision,omitempty"`
// TokenSecretRef references a Secret containing the HuggingFace access token.
// Required for private or gated repositories.
// +optional
TokenSecretRef *SecretKeySelector `json:"tokenSecretRef,omitempty"`
}
HuggingFaceSource configures model retrieval from HuggingFace Hub.
SECURITY: Model and Revision are passed to shell scripts. Only users who can create ModelArtifacts should have access; they already have equivalent privileges.
func (*HuggingFaceSource) DeepCopy ¶
func (in *HuggingFaceSource) DeepCopy() *HuggingFaceSource
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HuggingFaceSource.
func (*HuggingFaceSource) DeepCopyInto ¶
func (in *HuggingFaceSource) DeepCopyInto(out *HuggingFaceSource)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type InferencePoolSpec ¶ added in v0.8.22
type InferencePoolSpec struct {
// EndpointPicker configures the Endpoint Picker extension deployment.
// The operator creates and manages the EPP Deployment, Service, and
// supporting resources alongside the InferencePool.
// +required
EndpointPicker EndpointPickerSpec `json:"endpointPicker"`
}
InferencePoolSpec configures the Gateway API Inference Extension InferencePool. The operator creates an InferencePool with selector labels matching the serving pods and target port matching the engine port. It also deploys and manages the Endpoint Picker (EPP) infrastructure: Deployment, Service, ConfigMap, ServiceAccount, RBAC, and optionally Istio DestinationRule.
func (*InferencePoolSpec) DeepCopy ¶ added in v0.8.22
func (in *InferencePoolSpec) DeepCopy() *InferencePoolSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferencePoolSpec.
func (*InferencePoolSpec) DeepCopyInto ¶ added in v0.8.22
func (in *InferencePoolSpec) DeepCopyInto(out *InferencePoolSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ModelArtifact ¶
type ModelArtifact struct {
metav1.TypeMeta `json:",inline"`
// Standard object's metadata.
// +optional
metav1.ObjectMeta `json:"metadata,omitzero"`
// Spec defines the desired artifact.
// +required
Spec ModelArtifactSpec `json:"spec"`
// Status represents the current state of the artifact pipeline.
// +optional
Status ModelArtifactStatus `json:"status,omitzero"`
}
ModelArtifact is the Schema for the model artifacts API. A ModelArtifact declares intent to import a model from a source (e.g. HuggingFace), package it as an OCI artifact (ModelPack or ModelKit format), and push it to an OCI-compliant registry. The controller creates a Kubernetes Job to execute the import/pack/push pipeline and reports the resulting digest back to the status.
func (*ModelArtifact) DeepCopy ¶
func (in *ModelArtifact) DeepCopy() *ModelArtifact
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelArtifact.
func (*ModelArtifact) DeepCopyInto ¶
func (in *ModelArtifact) DeepCopyInto(out *ModelArtifact)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*ModelArtifact) DeepCopyObject ¶
func (in *ModelArtifact) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type ModelArtifactList ¶
type ModelArtifactList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitzero"`
Items []ModelArtifact `json:"items"`
}
ModelArtifactList contains a list of ModelArtifact resources.
func (*ModelArtifactList) DeepCopy ¶
func (in *ModelArtifactList) DeepCopy() *ModelArtifactList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelArtifactList.
func (*ModelArtifactList) DeepCopyInto ¶
func (in *ModelArtifactList) DeepCopyInto(out *ModelArtifactList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*ModelArtifactList) DeepCopyObject ¶
func (in *ModelArtifactList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type ModelArtifactPhase ¶ added in v0.8.22
type ModelArtifactPhase string
ModelArtifactPhase represents the current lifecycle phase of a ModelArtifact. +enum
const ( // PhasePending indicates the pipeline has not yet started. PhasePending ModelArtifactPhase = "Pending" // PhaseRunning indicates the import/pack/push Job is in progress. PhaseRunning ModelArtifactPhase = "Running" // PhaseSucceeded indicates the artifact was successfully pushed to the registry. PhaseSucceeded ModelArtifactPhase = "Succeeded" // PhaseFailed indicates the pipeline encountered an error. PhaseFailed ModelArtifactPhase = "Failed" )
type ModelArtifactSpec ¶
type ModelArtifactSpec struct {
// Source defines where to fetch the model from.
// +required
Source ModelSource `json:"source"`
// Target defines the OCI registry destination for the packaged artifact.
// +required
Target OCITarget `json:"target"`
// Format specifies the OCI artifact packaging format.
// +kubebuilder:validation:Enum=ModelPack;ModelKit
// +kubebuilder:default=ModelPack
// +optional
Format PackFormat `json:"format,omitempty"`
// Storage configures the temporary PVC used during the import/pack/push pipeline.
// The PVC is automatically cleaned up after the job completes.
// +required
Storage StorageSpec `json:"storage"`
}
ModelArtifactSpec defines the desired state of a ModelArtifact. It declares the model source, target OCI registry, packaging format, and temporary storage for the import/pack/push pipeline.
func (*ModelArtifactSpec) DeepCopy ¶
func (in *ModelArtifactSpec) DeepCopy() *ModelArtifactSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelArtifactSpec.
func (*ModelArtifactSpec) DeepCopyInto ¶
func (in *ModelArtifactSpec) DeepCopyInto(out *ModelArtifactSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ModelArtifactStatus ¶
type ModelArtifactStatus struct {
// ObservedGeneration is the most recent generation observed by the controller.
// +optional
ObservedGeneration int64 `json:"observedGeneration,omitempty"`
// Phase is the high-level summary of the artifact lifecycle.
// +optional
Phase ModelArtifactPhase `json:"phase,omitempty"`
// Digest is the OCI manifest digest of the pushed artifact (e.g. "sha256:abc123...").
// Only populated when Phase is Succeeded.
// +optional
Digest string `json:"digest,omitempty"`
// Reference is the full OCI reference of the pushed artifact including tag.
// +optional
Reference string `json:"reference,omitempty"`
// JobRef references the most recently created Job for this artifact.
// +optional
JobRef *ResourceReference `json:"jobRef,omitempty"`
// StartTime is the timestamp when the most recent job was created.
// +optional
StartTime *metav1.Time `json:"startTime,omitempty"`
// CompletionTime is the timestamp when the most recent job completed (succeeded or failed).
// +optional
CompletionTime *metav1.Time `json:"completionTime,omitempty"`
// Conditions store the status conditions of the ModelArtifact.
// +listType=map
// +listMapKey=type
// +optional
Conditions []metav1.Condition `json:"conditions,omitempty"`
}
ModelArtifactStatus defines the observed state of a ModelArtifact.
func (*ModelArtifactStatus) DeepCopy ¶
func (in *ModelArtifactStatus) DeepCopy() *ModelArtifactStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelArtifactStatus.
func (*ModelArtifactStatus) DeepCopyInto ¶
func (in *ModelArtifactStatus) DeepCopyInto(out *ModelArtifactStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ModelService ¶ added in v0.8.22
type ModelService struct {
metav1.TypeMeta `json:",inline"`
// Standard object's metadata.
// +optional
metav1.ObjectMeta `json:"metadata,omitzero"`
// Spec defines the desired model serving configuration.
// +required
Spec ModelServiceSpec `json:"spec"`
// Status represents the current state of the model serving deployment.
// +optional
Status ModelServiceStatus `json:"status,omitzero"`
}
ModelService is the Schema for the modelservices API. A ModelService declares intent to serve an OCI-packaged model using vLLM, optionally with Prefill/Decode disaggregation and Gateway API integration. The model artifact is mounted via Kubernetes image volumes (requires K8s >= 1.35), eliminating the need for init containers or PVC-based model loading.
func (*ModelService) DeepCopy ¶ added in v0.8.22
func (in *ModelService) DeepCopy() *ModelService
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelService.
func (*ModelService) DeepCopyInto ¶ added in v0.8.22
func (in *ModelService) DeepCopyInto(out *ModelService)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*ModelService) DeepCopyObject ¶ added in v0.8.22
func (in *ModelService) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type ModelServiceList ¶ added in v0.8.22
type ModelServiceList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitzero"`
Items []ModelService `json:"items"`
}
ModelServiceList contains a list of ModelService resources.
func (*ModelServiceList) DeepCopy ¶ added in v0.8.22
func (in *ModelServiceList) DeepCopy() *ModelServiceList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelServiceList.
func (*ModelServiceList) DeepCopyInto ¶ added in v0.8.22
func (in *ModelServiceList) DeepCopyInto(out *ModelServiceList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*ModelServiceList) DeepCopyObject ¶ added in v0.8.22
func (in *ModelServiceList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type ModelServicePhase ¶ added in v0.8.22
type ModelServicePhase string
ModelServicePhase represents the current lifecycle phase of a ModelService. +enum
const ( // ModelServicePending indicates the serving resources are being created. ModelServicePending ModelServicePhase = "Pending" // ModelServiceRunning indicates the serving pods are running but not yet fully ready. ModelServiceRunning ModelServicePhase = "Running" // ModelServiceReady indicates all desired replicas are ready and serving traffic. ModelServiceReady ModelServicePhase = "Ready" // ModelServiceFailed indicates one or more components have failed. ModelServiceFailed ModelServicePhase = "Failed" )
type ModelServiceSpec ¶ added in v0.8.22
type ModelServiceSpec struct {
// Model defines the OCI model artifact and serving identity.
// +required
Model ModelSpec `json:"model"`
// Engine configures the inference engine (vLLM).
// +required
Engine EngineSpec `json:"engine"`
// Accelerator configures the GPU/accelerator type for serving pods.
// +required
Accelerator AcceleratorSpec `json:"accelerator"`
// Decode configures the decode (or unified) serving pods.
// In non-disaggregated mode, these are the only serving pods.
// +required
Decode RoleSpec `json:"decode"`
// Prefill optionally configures separate prefill pods for disaggregated serving.
// When set, the serving architecture splits into Prefill (prompt processing)
// and Decode (token generation) phases on separate pod groups.
// +optional
Prefill *RoleSpec `json:"prefill,omitempty"`
// RoutingProxy configures the llm-d routing sidecar for disaggregated serving.
// Required when Prefill is set; the proxy routes prefill requests between pods.
// +optional
RoutingProxy *RoutingProxySpec `json:"routingProxy,omitempty"`
// InferencePool configures the Gateway API Inference Extension InferencePool.
// When set, the operator creates and manages an InferencePool resource
// with selector labels matching the serving pods.
// +optional
InferencePool *InferencePoolSpec `json:"inferencePool,omitempty"`
// HTTPRoute optionally creates a Gateway API HTTPRoute pointing to the InferencePool.
// Requires InferencePool to be set.
// +optional
HTTPRoute *HTTPRouteSpec `json:"httpRoute,omitempty"`
// Monitoring configures observability features.
// +optional
Monitoring *MonitoringSpec `json:"monitoring,omitempty"`
}
ModelServiceSpec defines the desired state of a ModelService. It declares how to serve an OCI-packaged model using vLLM with optional Prefill/Decode disaggregation and Gateway API Inference Extension integration.
func (*ModelServiceSpec) DeepCopy ¶ added in v0.8.22
func (in *ModelServiceSpec) DeepCopy() *ModelServiceSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelServiceSpec.
func (*ModelServiceSpec) DeepCopyInto ¶ added in v0.8.22
func (in *ModelServiceSpec) DeepCopyInto(out *ModelServiceSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ModelServiceStatus ¶ added in v0.8.22
type ModelServiceStatus struct {
// ObservedGeneration is the most recent generation observed by the controller.
// +optional
ObservedGeneration int64 `json:"observedGeneration,omitempty"`
// Phase is the high-level summary of the ModelService lifecycle.
// +optional
Phase ModelServicePhase `json:"phase,omitempty"`
// DecodeReady is the number of ready decode replicas.
// +optional
DecodeReady int32 `json:"decodeReady,omitempty"`
// DecodeReplicas is the desired number of decode replicas.
// +optional
DecodeReplicas int32 `json:"decodeReplicas,omitempty"`
// PrefillReady is the number of ready prefill replicas.
// +optional
PrefillReady int32 `json:"prefillReady,omitempty"`
// PrefillReplicas is the desired number of prefill replicas.
// +optional
PrefillReplicas int32 `json:"prefillReplicas,omitempty"`
// Conditions store the status conditions of the ModelService.
// +listType=map
// +listMapKey=type
// +optional
Conditions []metav1.Condition `json:"conditions,omitempty"`
}
ModelServiceStatus defines the observed state of a ModelService.
func (*ModelServiceStatus) DeepCopy ¶ added in v0.8.22
func (in *ModelServiceStatus) DeepCopy() *ModelServiceStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelServiceStatus.
func (*ModelServiceStatus) DeepCopyInto ¶ added in v0.8.22
func (in *ModelServiceStatus) DeepCopyInto(out *ModelServiceStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ModelSource ¶
type ModelSource struct {
// HuggingFace specifies a HuggingFace Hub repository as the model source.
// +optional
HuggingFace *HuggingFaceSource `json:"huggingFace,omitempty"`
}
ModelSource defines the origin of the model to be packaged. Exactly one source type must be specified. +kubebuilder:validation:XValidation:rule="[has(self.huggingFace)].filter(x, x).size() == 1",message="exactly one source must be specified"
func (*ModelSource) DeepCopy ¶
func (in *ModelSource) DeepCopy() *ModelSource
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelSource.
func (*ModelSource) DeepCopyInto ¶
func (in *ModelSource) DeepCopyInto(out *ModelSource)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ModelSpec ¶ added in v0.8.22
type ModelSpec struct {
// Name is the model identifier used in OpenAI-compatible API requests
// (e.g. "qwen/Qwen3-32B", "meta-llama/Llama-3-70B-Instruct").
// This is passed to vLLM as --served-model-name.
// +kubebuilder:validation:MinLength=1
// +kubebuilder:validation:MaxLength=253
// +required
Name string `json:"name"`
// Image is the OCI reference for the model artifact.
// It is used as a Kubernetes image volume source, leveraging node-level
// container image caching for efficient model distribution.
// Example: "registry.example.com/models/qwen3-32b:v1"
// +kubebuilder:validation:MinLength=1
// +required
Image string `json:"image"`
// MountPath is where the model artifact is mounted in serving containers.
// The vLLM --model argument is automatically set to this path.
// +kubebuilder:default="/models"
// +optional
MountPath string `json:"mountPath,omitempty"`
// ImagePullSecrets for pulling the model OCI artifact from a private registry.
// +optional
ImagePullSecrets []corev1.LocalObjectReference `json:"imagePullSecrets,omitempty"`
}
ModelSpec defines the OCI model artifact to serve. The model is mounted as a read-only Kubernetes image volume (requires K8s >= 1.35).
func (*ModelSpec) DeepCopy ¶ added in v0.8.22
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelSpec.
func (*ModelSpec) DeepCopyInto ¶ added in v0.8.22
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type MonitoringSpec ¶ added in v0.8.22
type MonitoringSpec struct {
// PodMonitor configures Prometheus PodMonitor creation for serving pods.
// +optional
PodMonitor *PodMonitorSpec `json:"podMonitor,omitempty"`
}
MonitoringSpec configures observability features.
func (*MonitoringSpec) DeepCopy ¶ added in v0.8.22
func (in *MonitoringSpec) DeepCopy() *MonitoringSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MonitoringSpec.
func (*MonitoringSpec) DeepCopyInto ¶ added in v0.8.22
func (in *MonitoringSpec) DeepCopyInto(out *MonitoringSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type OCITarget ¶
type OCITarget struct {
// Registry is the OCI registry host, optionally with port (e.g. "ghcr.io", "registry.local:5001").
// Must not contain slashes — the path component belongs in Repository.
// +kubebuilder:validation:MinLength=1
// +kubebuilder:validation:MaxLength=253
// +kubebuilder:validation:Pattern="^[a-zA-Z0-9][a-zA-Z0-9._:-]*$"
// +required
Registry string `json:"registry"`
// Repository is the OCI repository path within the registry (e.g. "myorg/models/phi-4", "facebook/opt-125m").
// Must not include the registry host. Must contain only alphanumerics, dots, underscores, hyphens, and slashes.
// +kubebuilder:validation:MinLength=1
// +kubebuilder:validation:MaxLength=253
// +kubebuilder:validation:Pattern="^[a-zA-Z0-9][a-zA-Z0-9._/-]*$"
// +required
Repository string `json:"repository"`
// Tag is the image tag to push. Defaults to "latest" if not specified.
// Must contain only alphanumerics, dots, underscores, and hyphens.
// +kubebuilder:validation:MinLength=1
// +kubebuilder:validation:MaxLength=128
// +kubebuilder:validation:Pattern="^[a-zA-Z0-9][a-zA-Z0-9._-]*$"
// +optional
Tag string `json:"tag,omitempty"`
// CredentialsSecretRef references a Secret containing OCI registry credentials.
// The Secret must contain "username" and "password" keys.
// +optional
CredentialsSecretRef *SecretReference `json:"credentialsSecretRef,omitempty"`
// Insecure uses an unencrypted connection to the registry instead of TLS.
// Only use for development or air-gapped environments.
// +optional
Insecure bool `json:"insecure,omitempty"`
}
OCITarget defines the destination OCI registry for the packaged artifact.
SECURITY: Registry, Repository, and Tag are passed to shell scripts. Only users who can create ModelArtifacts should have access; they already have equivalent privileges.
func (*OCITarget) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OCITarget.
func (*OCITarget) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type PackFormat ¶
type PackFormat string
PackFormat specifies the OCI artifact packaging format. +enum
const ( // PackFormatModelPack produces a CNCF ModelPack compliant OCI artifact. PackFormatModelPack PackFormat = "ModelPack" // PackFormatModelKit produces a KitOps native ModelKit OCI artifact. PackFormatModelKit PackFormat = "ModelKit" )
type ParallelismSpec ¶ added in v0.8.22
type ParallelismSpec struct {
// Tensor is the tensor-parallel-size: number of GPUs used to shard a single model.
// +kubebuilder:default=1
// +kubebuilder:validation:Minimum=1
// +optional
Tensor int32 `json:"tensor,omitempty"`
// Data is the data-parallel-size: number of data-parallel replicas within a single pod.
// +kubebuilder:default=1
// +kubebuilder:validation:Minimum=1
// +optional
Data int32 `json:"data,omitempty"`
// DataLocal is the data-parallel-size-local for disaggregated serving.
// +kubebuilder:default=1
// +kubebuilder:validation:Minimum=1
// +optional
DataLocal int32 `json:"dataLocal,omitempty"`
}
ParallelismSpec configures vLLM tensor and data parallelism.
func (*ParallelismSpec) DeepCopy ¶ added in v0.8.22
func (in *ParallelismSpec) DeepCopy() *ParallelismSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ParallelismSpec.
func (*ParallelismSpec) DeepCopyInto ¶ added in v0.8.22
func (in *ParallelismSpec) DeepCopyInto(out *ParallelismSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type PodMonitorSpec ¶ added in v0.8.22
type PodMonitorSpec struct {
// Enabled controls PodMonitor creation.
// +kubebuilder:default=false
// +optional
Enabled bool `json:"enabled,omitempty"`
// PortName to scrape metrics from. Must match a named port on the vLLM container.
// +kubebuilder:default="http"
// +optional
PortName string `json:"portName,omitempty"`
// Path is the HTTP endpoint to scrape metrics from.
// +kubebuilder:default="/metrics"
// +optional
Path string `json:"path,omitempty"`
// Interval between scrapes.
// +kubebuilder:default="30s"
// +optional
Interval string `json:"interval,omitempty"`
}
PodMonitorSpec configures Prometheus PodMonitor resources.
func (*PodMonitorSpec) DeepCopy ¶ added in v0.8.22
func (in *PodMonitorSpec) DeepCopy() *PodMonitorSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PodMonitorSpec.
func (*PodMonitorSpec) DeepCopyInto ¶ added in v0.8.22
func (in *PodMonitorSpec) DeepCopyInto(out *PodMonitorSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ResourceReference ¶
type ResourceReference struct {
// Name is the name of the referenced resource.
// +required
Name string `json:"name"`
// Namespace is the namespace of the referenced resource.
// +optional
Namespace string `json:"namespace,omitempty"`
}
ResourceReference is a lightweight reference to a namespaced Kubernetes resource.
func (*ResourceReference) DeepCopy ¶
func (in *ResourceReference) DeepCopy() *ResourceReference
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ResourceReference.
func (*ResourceReference) DeepCopyInto ¶
func (in *ResourceReference) DeepCopyInto(out *ResourceReference)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type RoleSpec ¶ added in v0.8.22
type RoleSpec struct {
// Replicas is the desired number of pod replicas.
// +kubebuilder:default=1
// +kubebuilder:validation:Minimum=0
// +optional
Replicas *int32 `json:"replicas,omitempty"`
// Parallelism configures vLLM tensor/data parallelism for this role.
// +optional
Parallelism ParallelismSpec `json:"parallelism,omitempty"`
// Resources for the vLLM container (CPU, memory).
// GPU resources are automatically calculated from accelerator type
// and parallelism (tensor * dataLocal) and should not be set manually.
// +optional
Resources corev1.ResourceRequirements `json:"resources,omitempty"`
// NodeSelector constrains pods to nodes with matching labels.
// +optional
NodeSelector map[string]string `json:"nodeSelector,omitempty"`
// Tolerations for the serving pods.
// +optional
Tolerations []corev1.Toleration `json:"tolerations,omitempty"`
}
RoleSpec configures a group of serving pods (decode or prefill).
func (*RoleSpec) DeepCopy ¶ added in v0.8.22
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RoleSpec.
func (*RoleSpec) DeepCopyInto ¶ added in v0.8.22
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type RoutingProxySpec ¶ added in v0.8.22
type RoutingProxySpec struct {
// Image is the routing proxy container image.
// +kubebuilder:default="ghcr.io/llm-d/llm-d-routing-sidecar:v0.6.0"
// +optional
Image string `json:"image,omitempty"`
// Connector specifies the KV-cache transfer protocol.
// +kubebuilder:default="nixlv2"
// +optional
Connector string `json:"connector,omitempty"`
// TargetPort is the port where vLLM actually listens when the proxy is enabled.
// The proxy intercepts on engine.port and forwards to this port.
// +kubebuilder:default=8200
// +kubebuilder:validation:Minimum=1
// +kubebuilder:validation:Maximum=65535
// +optional
TargetPort int32 `json:"targetPort,omitempty"`
// ZapEncoder sets the Zap log encoding format (e.g. "json", "console").
// +optional
ZapEncoder string `json:"zapEncoder,omitempty"`
// ZapLogLevel sets the Zap log level (e.g. "debug", "info", "error").
// +optional
ZapLogLevel string `json:"zapLogLevel,omitempty"`
// SecureProxy enables TLS on the routing proxy.
// +optional
SecureProxy *bool `json:"secureProxy,omitempty"`
// PrefillerUseTLS enables TLS for prefiller communication.
// +optional
PrefillerUseTLS *bool `json:"prefillerUseTLS,omitempty"`
// CertPath is the path to TLS certificates for the routing proxy.
// +optional
CertPath string `json:"certPath,omitempty"`
}
RoutingProxySpec configures the llm-d routing sidecar. The proxy runs as a native sidecar (restartable init container) that intercepts incoming requests and handles prefill/decode routing for disaggregated serving.
func (*RoutingProxySpec) DeepCopy ¶ added in v0.8.22
func (in *RoutingProxySpec) DeepCopy() *RoutingProxySpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RoutingProxySpec.
func (*RoutingProxySpec) DeepCopyInto ¶ added in v0.8.22
func (in *RoutingProxySpec) DeepCopyInto(out *RoutingProxySpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type SecretKeySelector ¶
type SecretKeySelector struct {
// Name is the name of the Secret in the same namespace as the ModelArtifact.
// +kubebuilder:validation:MinLength=1
// +required
Name string `json:"name"`
// Key is the key within the Secret data. If omitted, defaults to "token".
// +optional
Key string `json:"key,omitempty"`
}
SecretKeySelector references a specific key within a Secret.
func (*SecretKeySelector) DeepCopy ¶
func (in *SecretKeySelector) DeepCopy() *SecretKeySelector
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SecretKeySelector.
func (*SecretKeySelector) DeepCopyInto ¶
func (in *SecretKeySelector) DeepCopyInto(out *SecretKeySelector)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type SecretReference ¶
type SecretReference struct {
// Name is the name of the Secret in the same namespace as the ModelArtifact.
// +kubebuilder:validation:MinLength=1
// +required
Name string `json:"name"`
}
SecretReference references a Secret by name. Used when the Secret structure is fixed by convention (e.g. "username" and "password" keys for OCI credentials).
func (*SecretReference) DeepCopy ¶
func (in *SecretReference) DeepCopy() *SecretReference
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SecretReference.
func (*SecretReference) DeepCopyInto ¶
func (in *SecretReference) DeepCopyInto(out *SecretReference)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type StorageSpec ¶
type StorageSpec struct {
// Size is the requested PVC storage capacity (e.g. "100Gi").
// Should be at least 2x the expected model size to accommodate both
// the downloaded files and the packed artifact.
// +required
Size resource.Quantity `json:"size"`
// StorageClassName overrides the cluster default StorageClass.
// If not specified, the cluster default StorageClass is used.
// +optional
StorageClassName *string `json:"storageClassName,omitempty"`
}
StorageSpec configures the temporary PVC for the import/pack/push pipeline.
func (*StorageSpec) DeepCopy ¶
func (in *StorageSpec) DeepCopy() *StorageSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StorageSpec.
func (*StorageSpec) DeepCopyInto ¶
func (in *StorageSpec) DeepCopyInto(out *StorageSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.