Documentation
¶
Index ¶
- Constants
- func ApplyImageDefaults(ms *modelv1alpha1.ModelService, defaults DefaultImages)
- func BuildDefaultHTTPRoute(ms *modelv1alpha1.ModelService, metadataLabels map[string]string, ...) *gatewayv1.HTTPRoute
- func BuildDefaultInferencePool(ms *modelv1alpha1.ModelService, metadataLabels map[string]string) *inferenceextv1.InferencePool
- func BuildDeployment(ms *modelv1alpha1.ModelService, role *modelv1alpha1.RoleSpec, roleName string, ...) *appsv1.Deployment
- func BuildDestinationRule(ms *modelv1alpha1.ModelService, metadataLabels map[string]string) *istionetworkingv1beta1.DestinationRule
- func BuildEPPClusterRBAC(ms *modelv1alpha1.ModelService, metadataLabels map[string]string) (*rbacv1.ClusterRole, *rbacv1.ClusterRoleBinding)
- func BuildEPPConfigMap(ms *modelv1alpha1.ModelService, metadataLabels map[string]string) *corev1.ConfigMap
- func BuildEPPDeployment(ms *modelv1alpha1.ModelService, eppConfig EPPConfig, ...) *appsv1.Deployment
- func BuildEPPRBAC(ms *modelv1alpha1.ModelService, metadataLabels map[string]string, ...) (*rbacv1.Role, *rbacv1.RoleBinding)
- func BuildEPPSATokenSecret(ms *modelv1alpha1.ModelService, metadataLabels map[string]string) *corev1.Secret
- func BuildEPPService(ms *modelv1alpha1.ModelService, metadataLabels map[string]string, ...) *corev1.Service
- func BuildEPPServiceAccount(ms *modelv1alpha1.ModelService, metadataLabels map[string]string) *corev1.ServiceAccount
- func BuildEPPServiceMonitor(ms *modelv1alpha1.ModelService, eppConfig EPPConfig, ...) *monitoringv1.ServiceMonitor
- func BuildHTTPRoute(ms *modelv1alpha1.ModelService, metadataLabels map[string]string) *gatewayv1.HTTPRoute
- func BuildInferencePool(ms *modelv1alpha1.ModelService, metadataLabels map[string]string) *inferenceextv1.InferencePool
- func BuildPodMonitor(ms *modelv1alpha1.ModelService, roleName string, ...) *monitoringv1.PodMonitor
- func CleanupEPPClusterRBAC(ctx context.Context, c client.Client, namespace, msName string) error
- func ConfigMapHash(ms *modelv1alpha1.ModelService) string
- func DecodeName(msName string) string
- func DefaultReplicas(replicas *int32) int32
- func EPPClusterRBACName(namespace, msName string) string
- func EPPConfigMapName(msName string) string
- func EPPLabels(msName, version string) map[string]string
- func EPPName(msName string) string
- func EPPNameForService(msName string) string
- func EPPSecretName(msName string) string
- func EPPSelectorLabels(msName string) map[string]string
- func EPPServiceMonitorName(msName string) string
- func EnsureDecodeDeployment(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
- func EnsureDestinationRule(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
- func EnsureEPPClusterRBAC(ctx context.Context, c client.Client, ms *modelv1alpha1.ModelService, ...) error
- func EnsureEPPConfigMap(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
- func EnsureEPPDeployment(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
- func EnsureEPPRBAC(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
- func EnsureEPPSATokenSecret(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
- func EnsureEPPService(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
- func EnsureEPPServiceAccount(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
- func EnsureEPPServiceMonitor(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
- func EnsureHTTPRoute(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
- func EnsureInferencePool(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
- func EnsurePodMonitors(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
- func EnsurePrefillDeployment(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
- func GPUCount(p modelv1alpha1.ParallelismSpec) int32
- func GPUResourceName(accel modelv1alpha1.AcceleratorType) corev1.ResourceName
- func HTTPRouteLabels(msName, version string) map[string]string
- func HTTPRouteName(msName string) string
- func InferencePoolLabels(msName, version string) map[string]string
- func InferencePoolName(msName string) string
- func InferencePoolSelectorLabels(msName string) map[string]string
- func InjectGPUResources(res *corev1.ResourceRequirements, accel modelv1alpha1.AcceleratorType, ...)
- func LabelsForRole(msName, component, version string) map[string]string
- func PluginsConfigFile(ms *modelv1alpha1.ModelService) string
- func PodLabelsForRole(msName, component, version, role string) map[string]string
- func PodMonitorName(msName, role string) string
- func PrefillName(msName string) string
- func SanitizeDNS1035Label(s string) string
- func SelectorLabelsForRole(msName, component, role string) map[string]string
- type DefaultImages
- type EPPConfig
- type ObservationResult
- type TracingConfig
Constants ¶
const ( // ModelTmpVolumeName is the name of the emptyDir volume used by the model-unpack // init container for OCI pull cache (kit cache / scratch). ModelTmpVolumeName = "model-tmp" // ModelTmpMountPath is the mount path for the OCI cache inside the init container. // KITOPS_HOME is set to this path so kit stores its cache here. ModelTmpMountPath = "/tmp/model-oci" )
const ( ProviderIstio = "istio" ProviderGKE = "gke" ProviderNone = "none" )
const ( DefaultPluginsConfigFile = "default-plugins.yaml" PDPluginsConfigFile = "pd-config.yaml" )
const ( ConditionTypeReady = "Ready" ComponentDecode = "model-decode" ComponentPrefill = "model-prefill" ComponentEPP = "epp" ModelVolumeName = "model" // DockerConfigVolumeName is the name of the volume that mounts OCI registry credentials // (when spec.model.imagePullSecrets is set). Aligned with modelartifact for consistency. DockerConfigVolumeName = "docker-config" // DockerConfigMountPath is the mount path for the Docker config inside the Pod (config at config.json). DockerConfigMountPath = "/.docker" LabelRole = "llm-d.ai/role" LabelInferenceServer = "llm-d.ai/inference-serving" LabelInferenceServing = "llm-d.ai/inferenceServing" LabelModel = "llm-d.ai/model" // LabelInferencePool is the Pod label key for the EPP deployment name (value = EPPName(msName)). LabelInferencePool = "inferencepool" LabelValueTrue = "true" RoleDecode = "decode" RolePrefill = "prefill" FinalizerClusterRBAC = "model.otterscale.io/epp-cluster-rbac" )
const DefaultEPPExtProcPort = int32(9002)
DefaultEPPExtProcPort is the default gRPC extProc port when InferencePool is not set.
const DefaultGatewayGroup = "gateway.networking.k8s.io"
BuildDefaultHTTPRoute constructs an HTTPRoute when spec.httpRoute is not set. It attaches to the given gateway and routes to the InferencePool named poolName (EPPName(ms.Name) for default pool, or InferencePoolName(ms.Name) for explicit). DefaultGatewayGroup is the API group for Gateway parentRef in HTTPRoute.
const DefaultGatewayKind = "Gateway"
DefaultGatewayKind is the kind for Gateway parentRef in HTTPRoute.
const DefaultGatewayNamespace = "llm-d"
DefaultGatewayNamespace is the namespace of the Gateway in default HTTPRoute parentRefs.
const HeaderOtterScaleModelName = "OtterScale-Model-Name"
HeaderOtterScaleModelName is the HTTP header name used to match the model (value = ModelService name).
const PoolGroupAPIGroup = "inference.networking.k8s.io"
PoolGroupAPIGroup is the value for the EPP --pool-group argument (InferencePool API group).
Variables ¶
This section is empty.
Functions ¶
func ApplyImageDefaults ¶
func ApplyImageDefaults(ms *modelv1alpha1.ModelService, defaults DefaultImages)
ApplyImageDefaults fills in empty image fields on the ModelService with the operator-level defaults. The caller should pass a DeepCopy of the CR to avoid mutating the original API object.
func BuildDefaultHTTPRoute ¶
func BuildDefaultHTTPRoute( ms *modelv1alpha1.ModelService, metadataLabels map[string]string, gatewayName string, poolName string, ) *gatewayv1.HTTPRoute
func BuildDefaultInferencePool ¶
func BuildDefaultInferencePool( ms *modelv1alpha1.ModelService, metadataLabels map[string]string, ) *inferenceextv1.InferencePool
BuildDefaultInferencePool constructs a default InferencePool when ms.Spec.InferencePool is nil. The pool name is InferencePoolName(ms.Name) (no -epp suffix) so it matches the default HTTPRoute backendRef.name and the EPP --pool-name flag.
func BuildDeployment ¶
func BuildDeployment( ms *modelv1alpha1.ModelService, role *modelv1alpha1.RoleSpec, roleName string, deployName string, podLabels map[string]string, metadataLabels map[string]string, selectorLabels map[string]string, tracing TracingConfig, kitImage string, ) *appsv1.Deployment
BuildDeployment constructs an apps/v1 Deployment for a serving role (decode or prefill).
The model artifact is provisioned by an init container that pulls the OCI ModelPack from spec.model.image into a tmp emptyDir and runs "kit unpack" into spec.model.mountPath. The main vLLM container mounts the unpacked model read-only. GPU resources are injected automatically based on accelerator type and parallelism.
func BuildDestinationRule ¶
func BuildDestinationRule( ms *modelv1alpha1.ModelService, metadataLabels map[string]string, ) *istionetworkingv1beta1.DestinationRule
BuildDestinationRule constructs an Istio DestinationRule for the EPP Service.
The rule enables TLS SIMPLE mode with insecureSkipVerify so the Istio sidecar can communicate with the EPP service over mTLS without needing a custom CA cert.
func BuildEPPClusterRBAC ¶
func BuildEPPClusterRBAC( ms *modelv1alpha1.ModelService, metadataLabels map[string]string, ) (*rbacv1.ClusterRole, *rbacv1.ClusterRoleBinding)
BuildEPPClusterRBAC constructs a ClusterRole and ClusterRoleBinding that grant the EPP ServiceAccount permissions for metrics authentication:
- tokenreviews (authenticate bearer tokens from Prometheus)
- subjectaccessreviews (authorise metrics scraping)
- /metrics non-resource URL access
These are cluster-scoped and cannot carry an OwnerReference to the namespace-scoped ModelService; the controller uses a Finalizer to clean them up.
func BuildEPPConfigMap ¶
func BuildEPPConfigMap( ms *modelv1alpha1.ModelService, metadataLabels map[string]string, ) *corev1.ConfigMap
BuildEPPConfigMap constructs the ConfigMap for the EPP plugins configuration.
Both modes produce an explicit config file so the operator fully controls the scheduling behaviour regardless of what the EPP image ships as built-in defaults.
func BuildEPPDeployment ¶
func BuildEPPDeployment( ms *modelv1alpha1.ModelService, eppConfig EPPConfig, metadataLabels map[string]string, selectorLabels map[string]string, configHash string, ) *appsv1.Deployment
BuildEPPDeployment constructs the EPP Deployment.
func BuildEPPRBAC ¶
func BuildEPPRBAC( ms *modelv1alpha1.ModelService, metadataLabels map[string]string, replicas int32, ) (*rbacv1.Role, *rbacv1.RoleBinding)
BuildEPPRBAC constructs the Role and RoleBinding for the EPP ServiceAccount.
The EPP needs:
- Read access to pods (endpoint selection)
- Read access to InferencePool (GA API)
- Read access to InferenceObjective / InferenceModelRewrite (GAIE v1alpha2)
- When replicas > 1: leases + events for leader election
func BuildEPPSATokenSecret ¶
func BuildEPPSATokenSecret( ms *modelv1alpha1.ModelService, metadataLabels map[string]string, ) *corev1.Secret
BuildEPPSATokenSecret constructs a ServiceAccountToken Secret for the EPP. This token is used by Prometheus to authenticate when scraping metrics.
func BuildEPPService ¶
func BuildEPPService( ms *modelv1alpha1.ModelService, metadataLabels map[string]string, selectorLabels map[string]string, ) *corev1.Service
BuildEPPService constructs the Service exposing the EPP's extProc and metrics ports. Safe to call when ms.Spec.InferencePool is nil (uses default port).
func BuildEPPServiceAccount ¶
func BuildEPPServiceAccount( ms *modelv1alpha1.ModelService, metadataLabels map[string]string, ) *corev1.ServiceAccount
BuildEPPServiceAccount constructs the ServiceAccount for the EPP.
func BuildEPPServiceMonitor ¶
func BuildEPPServiceMonitor( ms *modelv1alpha1.ModelService, eppConfig EPPConfig, metadataLabels map[string]string, ) *monitoringv1.ServiceMonitor
BuildEPPServiceMonitor constructs a ServiceMonitor that scrapes metrics from the EPP Service's metrics port. When metrics endpoint auth is enabled, the endpoint is configured with bearer token authorization from the EPP SA token Secret.
func BuildHTTPRoute ¶
func BuildHTTPRoute( ms *modelv1alpha1.ModelService, metadataLabels map[string]string, ) *gatewayv1.HTTPRoute
BuildHTTPRoute constructs a typed HTTPRoute that routes traffic from a Gateway to the InferencePool backend.
The HTTPRoute uses the InferencePool as its backend reference, allowing the Gateway API Inference Extension EPP to perform intelligent model-aware routing.
func BuildInferencePool ¶
func BuildInferencePool( ms *modelv1alpha1.ModelService, metadataLabels map[string]string, ) *inferenceextv1.InferencePool
BuildInferencePool constructs a typed InferencePool resource.
The InferencePool selector matches serving pods via the common label set (without role), so both decode and prefill pods are included in the pool. The endpointPickerRef points to the EPP Service managed by this operator.
func BuildPodMonitor ¶
func BuildPodMonitor( ms *modelv1alpha1.ModelService, roleName string, selectorLabels map[string]string, metadataLabels map[string]string, ) *monitoringv1.PodMonitor
BuildPodMonitor constructs a typed PodMonitor for a serving role.
func CleanupEPPClusterRBAC ¶
CleanupEPPClusterRBAC removes the cluster-scoped RBAC resources. Called from the controller's Finalizer path.
func ConfigMapHash ¶
func ConfigMapHash(ms *modelv1alpha1.ModelService) string
ConfigMapHash returns a SHA-256 hash of the EPP ConfigMap data. When the hash changes (e.g. switching from non-PD to PD mode), the EPP Deployment's pod template annotation triggers an automatic rollout.
func DecodeName ¶
DecodeName returns the Deployment name for the decode role.
func DefaultReplicas ¶
DefaultReplicas returns a pointer to 1 if replicas is nil.
func EPPClusterRBACName ¶
EPPClusterRBACName returns a cluster-unique name for the EPP ClusterRole / ClusterRoleBinding. The namespace is embedded to avoid collisions when multiple ModelServices exist across namespaces. Uses sanitized EPP base.
func EPPConfigMapName ¶
EPPConfigMapName returns the EPP ConfigMap name (DNS-1035 compliant base).
func EPPLabels ¶
EPPLabels returns the full label set for EPP resources. app.kubernetes.io/name is set to the EPP Deployment name (EPPName(msName)) so it matches deployment.metadata.name.
func EPPName ¶
EPPName returns the EPP resource name (Deployment, Service, SA, Role, RoleBinding, ConfigMap). Must be DNS-1035 compliant so Role/SA/Service etc. can be created when msName contains dots.
func EPPNameForService ¶
EPPNameForService returns the EPP Service name; same as EPPName (kept for call-site clarity).
func EPPSecretName ¶
EPPSecretName returns the EPP SA token Secret name (DNS-1035 compliant base).
func EPPSelectorLabels ¶
EPPSelectorLabels returns labels used for EPP pod selection. Uses EPPName(msName) so selector matches the EPP Deployment and its Pod template labels.
func EPPServiceMonitorName ¶
EPPServiceMonitorName returns the EPP ServiceMonitor name (DNS-1035 compliant).
func EnsureDecodeDeployment ¶
func EnsureDecodeDeployment( ctx context.Context, c client.Client, scheme *runtime.Scheme, ms *modelv1alpha1.ModelService, version string, tracing TracingConfig, kitImage string, ) error
EnsureDecodeDeployment creates or updates the decode Deployment.
func EnsureDestinationRule ¶
func EnsureDestinationRule( ctx context.Context, c client.Client, scheme *runtime.Scheme, ms *modelv1alpha1.ModelService, eppConfig EPPConfig, version string, ) error
EnsureDestinationRule creates or updates the Istio DestinationRule for the EPP Service. The rule is ensured whenever the provider is Istio so that the EPP Deployment can connect to its own Service over mTLS, regardless of whether spec.inferencePool is set.
func EnsureEPPClusterRBAC ¶
func EnsureEPPClusterRBAC( ctx context.Context, c client.Client, ms *modelv1alpha1.ModelService, eppConfig EPPConfig, version string, ) error
EnsureEPPClusterRBAC creates or updates the cluster-scoped ClusterRole and ClusterRoleBinding for EPP metrics authentication. Since these are cluster-scoped they cannot carry an OwnerReference; the controller manages cleanup via a Finalizer on the ModelService.
func EnsureEPPConfigMap ¶
func EnsureEPPConfigMap( ctx context.Context, c client.Client, scheme *runtime.Scheme, ms *modelv1alpha1.ModelService, version string, ) error
EnsureEPPConfigMap creates or updates the EPP ConfigMap. Always ensured so that the EPP Deployment (created with or without spec.inferencePool) can run.
func EnsureEPPDeployment ¶
func EnsureEPPDeployment( ctx context.Context, c client.Client, scheme *runtime.Scheme, ms *modelv1alpha1.ModelService, eppConfig EPPConfig, defaultImages DefaultImages, version string, configHash string, ) error
EnsureEPPDeployment creates or updates the EPP Deployment. When spec.inferencePool is not set, a default InferencePool is used so the EPP Deployment is always created with the ModelService. The defaultImages parameter supplies operator-level image defaults for the synthetic EPP spec.
func EnsureEPPRBAC ¶
func EnsureEPPRBAC( ctx context.Context, c client.Client, scheme *runtime.Scheme, ms *modelv1alpha1.ModelService, version string, ) error
EnsureEPPRBAC creates or updates the EPP Role and RoleBinding. Always ensured when the EPP Deployment exists (including default EPP when InferencePool is nil) so the EPP Pod can list/watch inferencepools and pods.
func EnsureEPPSATokenSecret ¶
func EnsureEPPSATokenSecret( ctx context.Context, c client.Client, scheme *runtime.Scheme, ms *modelv1alpha1.ModelService, eppConfig EPPConfig, version string, ) error
EnsureEPPSATokenSecret creates or updates the EPP SA token Secret. The secret is only needed when metrics endpoint authentication is enabled and the provider is not GKE (GKE uses its own PodMonitoring mechanism).
func EnsureEPPService ¶
func EnsureEPPService( ctx context.Context, c client.Client, scheme *runtime.Scheme, ms *modelv1alpha1.ModelService, version string, ) error
EnsureEPPService creates or updates the EPP Service. Always ensured so it is created together with the EPP Deployment (with or without spec.inferencePool). BuildEPPService is safe when InferencePool is nil (uses default port).
Note: Reconcile runs only when ModelService.spec changes (GenerationChangedPredicate). If the operator was upgraded after the ModelService existed, trigger a reconcile by patching the spec (e.g. kubectl patch modelservice <name> -n <ns> --type=merge -p '{"spec":{"engine":{"port":8000}}}').
func EnsureEPPServiceAccount ¶
func EnsureEPPServiceAccount( ctx context.Context, c client.Client, scheme *runtime.Scheme, ms *modelv1alpha1.ModelService, version string, ) error
EnsureEPPServiceAccount creates or updates the EPP ServiceAccount. Always ensured so that the EPP Deployment (created with or without spec.inferencePool) can run.
func EnsureEPPServiceMonitor ¶
func EnsureEPPServiceMonitor( ctx context.Context, c client.Client, scheme *runtime.Scheme, ms *modelv1alpha1.ModelService, eppConfig EPPConfig, version string, ) error
EnsureEPPServiceMonitor creates or updates the EPP ServiceMonitor.
func EnsureHTTPRoute ¶
func EnsureHTTPRoute( ctx context.Context, c client.Client, scheme *runtime.Scheme, ms *modelv1alpha1.ModelService, version string, eppConfig EPPConfig, ) error
EnsureHTTPRoute creates or updates the HTTPRoute. When spec.httpRoute is set, the route is built from spec. When spec.httpRoute is nil and eppConfig.DefaultGatewayName is non-empty, a default HTTPRoute is created (attaches to that gateway, backend to the InferencePool). When both are nil/empty, any existing HTTPRoute is cleaned up.
func EnsureInferencePool ¶
func EnsureInferencePool( ctx context.Context, c client.Client, scheme *runtime.Scheme, ms *modelv1alpha1.ModelService, version string, ) error
EnsureInferencePool creates or updates the InferencePool. When spec.inferencePool is nil, a default InferencePool is created with name InferencePoolName(ms.Name) (no -epp suffix) so it matches the HTTPRoute backend and the EPP --pool-name flag.
func EnsurePodMonitors ¶
func EnsurePodMonitors( ctx context.Context, c client.Client, scheme *runtime.Scheme, ms *modelv1alpha1.ModelService, version string, ) error
EnsurePodMonitors creates or updates PodMonitors for decode (and optionally prefill).
func EnsurePrefillDeployment ¶
func EnsurePrefillDeployment( ctx context.Context, c client.Client, scheme *runtime.Scheme, ms *modelv1alpha1.ModelService, version string, tracing TracingConfig, kitImage string, ) error
EnsurePrefillDeployment creates or updates the prefill Deployment if configured.
func GPUCount ¶
func GPUCount(p modelv1alpha1.ParallelismSpec) int32
GPUCount calculates the number of GPUs required per pod from parallelism settings.
func GPUResourceName ¶
func GPUResourceName(accel modelv1alpha1.AcceleratorType) corev1.ResourceName
GPUResourceName returns the Kubernetes device plugin resource name for an accelerator.
func HTTPRouteLabels ¶
HTTPRouteLabels returns the metadata label set for the HTTPRoute resource. Uses component "epp" so the route is grouped with EPP-related resources.
func HTTPRouteName ¶
HTTPRouteName returns the HTTPRoute name.
func InferencePoolLabels ¶
InferencePoolLabels returns the metadata label set for the InferencePool resource. Uses component "epp" so the pool is grouped with EPP-related resources.
func InferencePoolName ¶
InferencePoolName returns the InferencePool name.
func InferencePoolSelectorLabels ¶
InferencePoolSelectorLabels returns the label set that InferencePool uses to select serving pods. Uses the common selector (without role) so both decode and prefill pods are included. Includes llm-d.ai/model so the pool selects only pods for this ModelService.
func InjectGPUResources ¶
func InjectGPUResources(res *corev1.ResourceRequirements, accel modelv1alpha1.AcceleratorType, count int32)
InjectGPUResources merges GPU resource limits into the given ResourceRequirements.
func LabelsForRole ¶
LabelsForRole returns the full label set for resources of a specific role.
func PluginsConfigFile ¶
func PluginsConfigFile(ms *modelv1alpha1.ModelService) string
PluginsConfigFile returns the plugins config filename based on whether Prefill/Decode disaggregation is enabled.
func PodLabelsForRole ¶
PodLabelsForRole returns labels applied to serving pods, including the llm-d role and llm-d.ai/model (ModelService name). Includes LabelInferenceServing so that spec.selector.matchLabels can select these pods. Only the Deployment's Pod template gets llm-d.ai/model; the Deployment object itself does not.
func PodMonitorName ¶
PodMonitorName returns the PodMonitor name for a role.
func PrefillName ¶
PrefillName returns the Deployment name for the prefill role.
func SanitizeDNS1035Label ¶
SanitizeDNS1035Label returns a string valid as a DNS-1035 label: lowercase alphanumeric and '-', must start with a letter, must end with an alphanumeric.
func SelectorLabelsForRole ¶
SelectorLabelsForRole returns labels used for Deployment spec.selector.matchLabels (version-independent). Includes llm-d.ai/model and llm-d.ai/role so the selector matches the Pod template labels.
Types ¶
type DefaultImages ¶
type DefaultImages struct {
// Engine is the default vLLM container image (--default-engine-image).
Engine string
// EPP is the default Endpoint Picker image (--default-epp-image).
EPP string
// RoutingProxy is the default routing proxy sidecar image (--default-routing-proxy-image).
RoutingProxy string
}
DefaultImages holds default container images used when the CR does not specify an image. These are set via operator CLI flags so that platform administrators can control image versions at the cluster level without baking them into the CRD schema.
type EPPConfig ¶
type EPPConfig struct {
// Provider is the infrastructure provider: "istio" (default), "gke", "none".
// Controls whether Istio DestinationRule or GKE-specific resources are created.
Provider string
// Flags are additional command-line flags passed to the EPP container.
Flags map[string]string
// MetricsEndpointAuth controls whether the EPP metrics endpoint requires
// authentication. When false, --metrics-endpoint-auth=false is passed.
MetricsEndpointAuth bool
// DefaultGatewayName is the Gateway name used when creating an HTTPRoute
// and spec.httpRoute is not set. When non-empty, a default HTTPRoute is
// created (parentRef to this gateway, backend to the InferencePool).
// When empty, HTTPRoute is only created when spec.httpRoute is set.
DefaultGatewayName string
// Tracing holds OpenTelemetry tracing configuration.
Tracing TracingConfig
}
EPPConfig holds cluster-level EPP settings that are shared across all ModelService instances. Per-ModelService settings (image, replicas, resources, port, failureMode) live in the CRD's EndpointPickerSpec.
type ObservationResult ¶
type ObservationResult struct {
Phase modelv1alpha1.ModelServicePhase
DecodeReady int32
DecodeReplicas int32
PrefillReady int32
PrefillReplicas int32
Ready metav1.ConditionStatus
Reason string
Message string
}
ObservationResult holds the derived status from observing the current cluster state.
func ObserveStatus ¶
func ObserveStatus( ctx context.Context, c client.Client, ms *modelv1alpha1.ModelService, ) (ObservationResult, error)
ObserveStatus reads the current Deployment states and derives the ModelService status.