Documentation
¶
Index ¶
- Constants
- func BuildDeployment(ms *modelv1alpha1.ModelService, role *modelv1alpha1.RoleSpec, roleName string, ...) *appsv1.Deployment
- func BuildDestinationRule(ms *modelv1alpha1.ModelService, metadataLabels map[string]string) *istionetworkingv1beta1.DestinationRule
- func BuildEPPClusterRBAC(ms *modelv1alpha1.ModelService, metadataLabels map[string]string) (*rbacv1.ClusterRole, *rbacv1.ClusterRoleBinding)
- func BuildEPPConfigMap(ms *modelv1alpha1.ModelService, metadataLabels map[string]string) *corev1.ConfigMap
- func BuildEPPDeployment(ms *modelv1alpha1.ModelService, eppConfig EPPConfig, ...) *appsv1.Deployment
- func BuildEPPRBAC(ms *modelv1alpha1.ModelService, metadataLabels map[string]string, ...) (*rbacv1.Role, *rbacv1.RoleBinding)
- func BuildEPPSATokenSecret(ms *modelv1alpha1.ModelService, metadataLabels map[string]string) *corev1.Secret
- func BuildEPPService(ms *modelv1alpha1.ModelService, metadataLabels map[string]string, ...) *corev1.Service
- func BuildEPPServiceAccount(ms *modelv1alpha1.ModelService, metadataLabels map[string]string) *corev1.ServiceAccount
- func BuildEPPServiceMonitor(ms *modelv1alpha1.ModelService, eppConfig EPPConfig, ...) *monitoringv1.ServiceMonitor
- func BuildHTTPRoute(ms *modelv1alpha1.ModelService, metadataLabels map[string]string) *gatewayv1.HTTPRoute
- func BuildInferencePool(ms *modelv1alpha1.ModelService, metadataLabels map[string]string) *inferenceextv1.InferencePool
- func BuildPodMonitor(ms *modelv1alpha1.ModelService, roleName string, ...) *monitoringv1.PodMonitor
- func CleanupEPPClusterRBAC(ctx context.Context, c client.Client, namespace, msName string) error
- func ConfigMapHash(ms *modelv1alpha1.ModelService) string
- func DecodeName(msName string) string
- func DefaultReplicas(replicas *int32) int32
- func EPPClusterRBACName(namespace, msName string) string
- func EPPConfigMapName(msName string) string
- func EPPLabels(msName, version string) map[string]string
- func EPPName(msName string) string
- func EPPSecretName(msName string) string
- func EPPSelectorLabels(msName string) map[string]string
- func EPPServiceMonitorName(msName string) string
- func EnsureDecodeDeployment(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
- func EnsureDestinationRule(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
- func EnsureEPPClusterRBAC(ctx context.Context, c client.Client, ms *modelv1alpha1.ModelService, ...) error
- func EnsureEPPConfigMap(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
- func EnsureEPPDeployment(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
- func EnsureEPPRBAC(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
- func EnsureEPPSATokenSecret(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
- func EnsureEPPService(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
- func EnsureEPPServiceAccount(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
- func EnsureEPPServiceMonitor(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
- func EnsureHTTPRoute(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
- func EnsureInferencePool(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
- func EnsurePodMonitors(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
- func EnsurePrefillDeployment(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
- func GPUCount(p modelv1alpha1.ParallelismSpec) int32
- func GPUResourceName(accel modelv1alpha1.AcceleratorType) corev1.ResourceName
- func HTTPRouteName(msName string) string
- func InferencePoolName(msName string) string
- func InferencePoolSelectorLabels(msName string) map[string]string
- func InjectGPUResources(res *corev1.ResourceRequirements, accel modelv1alpha1.AcceleratorType, ...)
- func LabelsForRole(msName, component, version string) map[string]string
- func PluginsConfigFile(ms *modelv1alpha1.ModelService) string
- func PodLabelsForRole(msName, component, version, role string) map[string]string
- func PodMonitorName(msName, role string) string
- func PrefillName(msName string) string
- func SelectorLabelsForRole(msName, component string) map[string]string
- type EPPConfig
- type ObservationResult
- type TracingConfig
Constants ¶
const ( ProviderIstio = "istio" ProviderGKE = "gke" ProviderNone = "none" )
const ( DefaultPluginsConfigFile = "default-plugins.yaml" PDPluginsConfigFile = "pd-config.yaml" )
const ( ConditionTypeReady = "Ready" ComponentDecode = "model-decode" ComponentPrefill = "model-prefill" ComponentEPP = "epp" ModelVolumeName = "model" LabelRole = "llm-d.ai/role" LabelInferenceServer = "llm-d.ai/inference-serving" LabelValueTrue = "true" RoleDecode = "decode" RolePrefill = "prefill" FinalizerClusterRBAC = "model.otterscale.io/epp-cluster-rbac" )
Variables ¶
This section is empty.
Functions ¶
func BuildDeployment ¶
func BuildDeployment( ms *modelv1alpha1.ModelService, role *modelv1alpha1.RoleSpec, roleName string, deployName string, podLabels map[string]string, metadataLabels map[string]string, selectorLabels map[string]string, tracing TracingConfig, ) *appsv1.Deployment
BuildDeployment constructs an apps/v1 Deployment for a serving role (decode or prefill).
The Deployment uses a Kubernetes image volume (K8s >= 1.35) to mount the OCI model artifact directly — no init containers or PVC provisioning required. GPU resources are injected automatically based on accelerator type and parallelism.
func BuildDestinationRule ¶
func BuildDestinationRule( ms *modelv1alpha1.ModelService, metadataLabels map[string]string, ) *istionetworkingv1beta1.DestinationRule
BuildDestinationRule constructs an Istio DestinationRule for the EPP Service.
The rule enables TLS SIMPLE mode with insecureSkipVerify so the Istio sidecar can communicate with the EPP service over mTLS without needing a custom CA cert.
func BuildEPPClusterRBAC ¶
func BuildEPPClusterRBAC( ms *modelv1alpha1.ModelService, metadataLabels map[string]string, ) (*rbacv1.ClusterRole, *rbacv1.ClusterRoleBinding)
BuildEPPClusterRBAC constructs a ClusterRole and ClusterRoleBinding that grant the EPP ServiceAccount permissions for metrics authentication:
- tokenreviews (authenticate bearer tokens from Prometheus)
- subjectaccessreviews (authorise metrics scraping)
- /metrics non-resource URL access
These are cluster-scoped and cannot carry an OwnerReference to the namespace-scoped ModelService; the controller uses a Finalizer to clean them up.
func BuildEPPConfigMap ¶
func BuildEPPConfigMap( ms *modelv1alpha1.ModelService, metadataLabels map[string]string, ) *corev1.ConfigMap
BuildEPPConfigMap constructs the ConfigMap for the EPP plugins configuration.
Both modes produce an explicit config file so the operator fully controls the scheduling behaviour regardless of what the EPP image ships as built-in defaults.
func BuildEPPDeployment ¶
func BuildEPPDeployment( ms *modelv1alpha1.ModelService, eppConfig EPPConfig, metadataLabels map[string]string, selectorLabels map[string]string, configHash string, ) *appsv1.Deployment
BuildEPPDeployment constructs the EPP Deployment.
func BuildEPPRBAC ¶
func BuildEPPRBAC( ms *modelv1alpha1.ModelService, metadataLabels map[string]string, replicas int32, ) (*rbacv1.Role, *rbacv1.RoleBinding)
BuildEPPRBAC constructs the Role and RoleBinding for the EPP ServiceAccount.
The EPP needs:
- Read access to pods (endpoint selection)
- Read access to InferencePool (GA API)
- Read access to InferenceObjective / InferenceModelRewrite (GAIE v1alpha2)
- When replicas > 1: leases + events for leader election
func BuildEPPSATokenSecret ¶
func BuildEPPSATokenSecret( ms *modelv1alpha1.ModelService, metadataLabels map[string]string, ) *corev1.Secret
BuildEPPSATokenSecret constructs a ServiceAccountToken Secret for the EPP. This token is used by Prometheus to authenticate when scraping metrics.
func BuildEPPService ¶
func BuildEPPService( ms *modelv1alpha1.ModelService, metadataLabels map[string]string, selectorLabels map[string]string, ) *corev1.Service
BuildEPPService constructs the Service exposing the EPP's extProc and metrics ports.
func BuildEPPServiceAccount ¶
func BuildEPPServiceAccount( ms *modelv1alpha1.ModelService, metadataLabels map[string]string, ) *corev1.ServiceAccount
BuildEPPServiceAccount constructs the ServiceAccount for the EPP.
func BuildEPPServiceMonitor ¶
func BuildEPPServiceMonitor( ms *modelv1alpha1.ModelService, eppConfig EPPConfig, metadataLabels map[string]string, ) *monitoringv1.ServiceMonitor
BuildEPPServiceMonitor constructs a ServiceMonitor that scrapes metrics from the EPP Service's metrics port. When metrics endpoint auth is enabled, the endpoint is configured with bearer token authorization from the EPP SA token Secret.
func BuildHTTPRoute ¶
func BuildHTTPRoute( ms *modelv1alpha1.ModelService, metadataLabels map[string]string, ) *gatewayv1.HTTPRoute
BuildHTTPRoute constructs a typed HTTPRoute that routes traffic from a Gateway to the InferencePool backend.
The HTTPRoute uses the InferencePool as its backend reference, allowing the Gateway API Inference Extension EPP to perform intelligent model-aware routing.
func BuildInferencePool ¶
func BuildInferencePool( ms *modelv1alpha1.ModelService, metadataLabels map[string]string, ) *inferenceextv1.InferencePool
BuildInferencePool constructs a typed InferencePool resource.
The InferencePool selector matches serving pods via the common label set (without role), so both decode and prefill pods are included in the pool. The endpointPickerRef points to the EPP Service managed by this operator.
func BuildPodMonitor ¶
func BuildPodMonitor( ms *modelv1alpha1.ModelService, roleName string, selectorLabels map[string]string, metadataLabels map[string]string, ) *monitoringv1.PodMonitor
BuildPodMonitor constructs a typed PodMonitor for a serving role.
func CleanupEPPClusterRBAC ¶
CleanupEPPClusterRBAC removes the cluster-scoped RBAC resources. Called from the controller's Finalizer path.
func ConfigMapHash ¶
func ConfigMapHash(ms *modelv1alpha1.ModelService) string
ConfigMapHash returns a SHA-256 hash of the EPP ConfigMap data. When the hash changes (e.g. switching from non-PD to PD mode), the EPP Deployment's pod template annotation triggers an automatic rollout.
func DecodeName ¶
DecodeName returns the Deployment name for the decode role.
func DefaultReplicas ¶
DefaultReplicas returns a pointer to 1 if replicas is nil.
func EPPClusterRBACName ¶
EPPClusterRBACName returns a cluster-unique name for the EPP ClusterRole / ClusterRoleBinding. The namespace is embedded to avoid collisions when multiple ModelServices exist across namespaces.
func EPPConfigMapName ¶
EPPConfigMapName returns the EPP ConfigMap name.
func EPPSecretName ¶
EPPSecretName returns the EPP SA token Secret name.
func EPPSelectorLabels ¶
EPPSelectorLabels returns labels used for EPP pod selection (version-independent).
func EPPServiceMonitorName ¶
EPPServiceMonitorName returns the EPP ServiceMonitor name.
func EnsureDecodeDeployment ¶
func EnsureDecodeDeployment( ctx context.Context, c client.Client, scheme *runtime.Scheme, ms *modelv1alpha1.ModelService, version string, tracing TracingConfig, ) error
EnsureDecodeDeployment creates or updates the decode Deployment.
func EnsureDestinationRule ¶
func EnsureDestinationRule( ctx context.Context, c client.Client, scheme *runtime.Scheme, ms *modelv1alpha1.ModelService, eppConfig EPPConfig, version string, ) error
EnsureDestinationRule creates or updates the Istio DestinationRule.
func EnsureEPPClusterRBAC ¶
func EnsureEPPClusterRBAC( ctx context.Context, c client.Client, ms *modelv1alpha1.ModelService, eppConfig EPPConfig, version string, ) error
EnsureEPPClusterRBAC creates or updates the cluster-scoped ClusterRole and ClusterRoleBinding for EPP metrics authentication. Since these are cluster-scoped they cannot carry an OwnerReference; the controller manages cleanup via a Finalizer on the ModelService.
func EnsureEPPConfigMap ¶
func EnsureEPPConfigMap( ctx context.Context, c client.Client, scheme *runtime.Scheme, ms *modelv1alpha1.ModelService, version string, ) error
EnsureEPPConfigMap creates or updates the EPP ConfigMap.
func EnsureEPPDeployment ¶
func EnsureEPPDeployment( ctx context.Context, c client.Client, scheme *runtime.Scheme, ms *modelv1alpha1.ModelService, eppConfig EPPConfig, version string, configHash string, ) error
EnsureEPPDeployment creates or updates the EPP Deployment.
func EnsureEPPRBAC ¶
func EnsureEPPRBAC( ctx context.Context, c client.Client, scheme *runtime.Scheme, ms *modelv1alpha1.ModelService, version string, ) error
EnsureEPPRBAC creates or updates the EPP Role and RoleBinding.
func EnsureEPPSATokenSecret ¶
func EnsureEPPSATokenSecret( ctx context.Context, c client.Client, scheme *runtime.Scheme, ms *modelv1alpha1.ModelService, eppConfig EPPConfig, version string, ) error
EnsureEPPSATokenSecret creates or updates the EPP SA token Secret. The secret is only needed when metrics endpoint authentication is enabled and the provider is not GKE (GKE uses its own PodMonitoring mechanism).
func EnsureEPPService ¶
func EnsureEPPService( ctx context.Context, c client.Client, scheme *runtime.Scheme, ms *modelv1alpha1.ModelService, version string, ) error
EnsureEPPService creates or updates the EPP Service.
func EnsureEPPServiceAccount ¶
func EnsureEPPServiceAccount( ctx context.Context, c client.Client, scheme *runtime.Scheme, ms *modelv1alpha1.ModelService, version string, ) error
EnsureEPPServiceAccount creates or updates the EPP ServiceAccount.
func EnsureEPPServiceMonitor ¶
func EnsureEPPServiceMonitor( ctx context.Context, c client.Client, scheme *runtime.Scheme, ms *modelv1alpha1.ModelService, eppConfig EPPConfig, version string, ) error
EnsureEPPServiceMonitor creates or updates the EPP ServiceMonitor.
func EnsureHTTPRoute ¶
func EnsureHTTPRoute( ctx context.Context, c client.Client, scheme *runtime.Scheme, ms *modelv1alpha1.ModelService, version string, ) error
EnsureHTTPRoute creates or updates the HTTPRoute if configured.
func EnsureInferencePool ¶
func EnsureInferencePool( ctx context.Context, c client.Client, scheme *runtime.Scheme, ms *modelv1alpha1.ModelService, version string, ) error
EnsureInferencePool creates or updates the InferencePool if configured.
func EnsurePodMonitors ¶
func EnsurePodMonitors( ctx context.Context, c client.Client, scheme *runtime.Scheme, ms *modelv1alpha1.ModelService, version string, ) error
EnsurePodMonitors creates or updates PodMonitors for decode (and optionally prefill).
func EnsurePrefillDeployment ¶
func EnsurePrefillDeployment( ctx context.Context, c client.Client, scheme *runtime.Scheme, ms *modelv1alpha1.ModelService, version string, tracing TracingConfig, ) error
EnsurePrefillDeployment creates or updates the prefill Deployment if configured.
func GPUCount ¶
func GPUCount(p modelv1alpha1.ParallelismSpec) int32
GPUCount calculates the number of GPUs required per pod from parallelism settings.
func GPUResourceName ¶
func GPUResourceName(accel modelv1alpha1.AcceleratorType) corev1.ResourceName
GPUResourceName returns the Kubernetes device plugin resource name for an accelerator.
func HTTPRouteName ¶
HTTPRouteName returns the HTTPRoute name.
func InferencePoolName ¶
InferencePoolName returns the InferencePool name.
func InferencePoolSelectorLabels ¶
InferencePoolSelectorLabels returns the label set that InferencePool uses to select serving pods. Uses the common selector (without role) so both decode and prefill pods are included.
func InjectGPUResources ¶
func InjectGPUResources(res *corev1.ResourceRequirements, accel modelv1alpha1.AcceleratorType, count int32)
InjectGPUResources merges GPU resource limits into the given ResourceRequirements.
func LabelsForRole ¶
LabelsForRole returns the full label set for resources of a specific role.
func PluginsConfigFile ¶
func PluginsConfigFile(ms *modelv1alpha1.ModelService) string
PluginsConfigFile returns the plugins config filename based on whether Prefill/Decode disaggregation is enabled.
func PodLabelsForRole ¶
PodLabelsForRole returns labels applied to serving pods, including the llm-d role.
func PodMonitorName ¶
PodMonitorName returns the PodMonitor name for a role.
func PrefillName ¶
PrefillName returns the Deployment name for the prefill role.
func SelectorLabelsForRole ¶
SelectorLabelsForRole returns labels used for list/match queries (version-independent).
Types ¶
type EPPConfig ¶
type EPPConfig struct {
// Provider is the infrastructure provider: "istio" (default), "gke", "none".
// Controls whether Istio DestinationRule or GKE-specific resources are created.
Provider string
// Flags are additional command-line flags passed to the EPP container.
Flags map[string]string
// MetricsEndpointAuth controls whether the EPP metrics endpoint requires
// authentication. When false, --metrics-endpoint-auth=false is passed.
MetricsEndpointAuth bool
// Tracing holds OpenTelemetry tracing configuration.
Tracing TracingConfig
}
EPPConfig holds cluster-level EPP settings that are shared across all ModelService instances. Per-ModelService settings (image, replicas, resources, port, failureMode) live in the CRD's EndpointPickerSpec.
type ObservationResult ¶
type ObservationResult struct {
Phase modelv1alpha1.ModelServicePhase
DecodeReady int32
DecodeReplicas int32
PrefillReady int32
PrefillReplicas int32
Ready metav1.ConditionStatus
Reason string
Message string
}
ObservationResult holds the derived status from observing the current cluster state.
func ObserveStatus ¶
func ObserveStatus( ctx context.Context, c client.Client, ms *modelv1alpha1.ModelService, ) (ObservationResult, error)
ObserveStatus reads the current Deployment states and derives the ModelService status.