modelservice

package

v0.9.0-alpha.2 Latest Latest Go to latest Published: Mar 17, 2026 License: Apache-2.0 Imports: 29 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/otterscale/model-operator

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
func ApplyImageDefaults(ms *modelv1alpha1.ModelService, defaults DefaultImages)
func BuildDefaultHTTPRoute(ms *modelv1alpha1.ModelService, metadataLabels map[string]string, ...) *gatewayv1.HTTPRoute
func BuildDefaultInferencePool(ms *modelv1alpha1.ModelService, metadataLabels map[string]string) *inferenceextv1.InferencePool
func BuildDeployment(ms *modelv1alpha1.ModelService, role *modelv1alpha1.RoleSpec, roleName string, ...) *appsv1.Deployment
func BuildDestinationRule(ms *modelv1alpha1.ModelService, metadataLabels map[string]string) *istionetworkingv1beta1.DestinationRule
func BuildEPPClusterRBAC(ms *modelv1alpha1.ModelService, metadataLabels map[string]string) (*rbacv1.ClusterRole, *rbacv1.ClusterRoleBinding)
func BuildEPPConfigMap(ms *modelv1alpha1.ModelService, metadataLabels map[string]string) *corev1.ConfigMap
func BuildEPPDeployment(ms *modelv1alpha1.ModelService, eppConfig EPPConfig, ...) *appsv1.Deployment
func BuildEPPRBAC(ms *modelv1alpha1.ModelService, metadataLabels map[string]string, ...) (*rbacv1.Role, *rbacv1.RoleBinding)
func BuildEPPSATokenSecret(ms *modelv1alpha1.ModelService, metadataLabels map[string]string) *corev1.Secret
func BuildEPPService(ms *modelv1alpha1.ModelService, metadataLabels map[string]string, ...) *corev1.Service
func BuildEPPServiceAccount(ms *modelv1alpha1.ModelService, metadataLabels map[string]string) *corev1.ServiceAccount
func BuildEPPServiceMonitor(ms *modelv1alpha1.ModelService, eppConfig EPPConfig, ...) *monitoringv1.ServiceMonitor
func BuildHTTPRoute(ms *modelv1alpha1.ModelService, metadataLabels map[string]string) *gatewayv1.HTTPRoute
func BuildInferencePool(ms *modelv1alpha1.ModelService, metadataLabels map[string]string) *inferenceextv1.InferencePool
func BuildPodMonitor(ms *modelv1alpha1.ModelService, roleName string, ...) *monitoringv1.PodMonitor
func CleanupEPPClusterRBAC(ctx context.Context, c client.Client, namespace, msName string) error
func ConfigMapHash(ms *modelv1alpha1.ModelService) string
func DecodeName(msName string) string
func DefaultReplicas(replicas *int32) int32
func EPPClusterRBACName(namespace, msName string) string
func EPPConfigMapName(msName string) string
func EPPLabels(msName, version string) map[string]string
func EPPName(msName string) string
func EPPNameForService(msName string) string
func EPPSecretName(msName string) string
func EPPSelectorLabels(msName string) map[string]string
func EPPServiceMonitorName(msName string) string
func EnsureDecodeDeployment(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
func EnsureDestinationRule(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
func EnsureEPPClusterRBAC(ctx context.Context, c client.Client, ms *modelv1alpha1.ModelService, ...) error
func EnsureEPPConfigMap(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
func EnsureEPPDeployment(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
func EnsureEPPRBAC(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
func EnsureEPPSATokenSecret(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
func EnsureEPPService(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
func EnsureEPPServiceAccount(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
func EnsureEPPServiceMonitor(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
func EnsureHTTPRoute(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
func EnsureInferencePool(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
func EnsurePodMonitors(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
func EnsurePrefillDeployment(ctx context.Context, c client.Client, scheme *runtime.Scheme, ...) error
func GPUCount(p modelv1alpha1.ParallelismSpec) int32
func GPUResourceName(accel modelv1alpha1.AcceleratorType) corev1.ResourceName
func HTTPRouteLabels(msName, version string) map[string]string
func HTTPRouteName(msName string) string
func InferencePoolLabels(msName, version string) map[string]string
func InferencePoolName(msName string) string
func InferencePoolSelectorLabels(msName string) map[string]string
func InjectGPUResources(res *corev1.ResourceRequirements, accel modelv1alpha1.AcceleratorType, ...)
func LabelsForRole(msName, component, version string) map[string]string
func PluginsConfigFile(ms *modelv1alpha1.ModelService) string
func PodLabelsForRole(msName, component, version, role string) map[string]string
func PodMonitorName(msName, role string) string
func PrefillName(msName string) string
func SanitizeDNS1035Label(s string) string
func SelectorLabelsForRole(msName, component, role string) map[string]string
type DefaultImages
type EPPConfig
type ObservationResult
- func ObserveStatus(ctx context.Context, c client.Client, ms *modelv1alpha1.ModelService) (ObservationResult, error)
type TracingConfig

Constants ¶

View Source

const (

	// ModelTmpVolumeName is the name of the emptyDir volume used by the model-unpack
	// init container for OCI pull cache (kit cache / scratch).
	ModelTmpVolumeName = "model-tmp"
	// ModelTmpMountPath is the mount path for the OCI cache inside the init container.
	// KITOPS_HOME is set to this path so kit stores its cache here.
	ModelTmpMountPath = "/tmp/model-oci"
)

View Source

const (
	ProviderIstio = "istio"
	ProviderGKE   = "gke"
	ProviderNone  = "none"
)

View Source

const (
	DefaultPluginsConfigFile = "default-plugins.yaml"
	PDPluginsConfigFile      = "pd-config.yaml"
)

View Source

const (
	ConditionTypeReady = "Ready"

	ComponentDecode  = "model-decode"
	ComponentPrefill = "model-prefill"
	ComponentEPP     = "epp"

	ModelVolumeName = "model"

	// DockerConfigVolumeName is the name of the volume that mounts OCI registry credentials
	// (when spec.model.imagePullSecrets is set). Aligned with modelartifact for consistency.
	DockerConfigVolumeName = "docker-config"
	// DockerConfigMountPath is the mount path for the Docker config inside the Pod (config at config.json).
	DockerConfigMountPath = "/.docker"

	LabelRole             = "llm-d.ai/role"
	LabelInferenceServer  = "llm-d.ai/inference-serving"
	LabelInferenceServing = "llm-d.ai/inferenceServing"
	LabelModel            = "llm-d.ai/model"
	// LabelInferencePool is the Pod label key for the EPP deployment name (value = EPPName(msName)).
	LabelInferencePool = "inferencepool"

	LabelValueTrue = "true"

	RoleDecode  = "decode"
	RolePrefill = "prefill"

	FinalizerClusterRBAC = "model.otterscale.io/epp-cluster-rbac"
)

View Source

const DefaultEPPExtProcPort = int32(9002)

DefaultEPPExtProcPort is the default gRPC extProc port when InferencePool is not set.

View Source

const DefaultGatewayGroup = "gateway.networking.k8s.io"

BuildDefaultHTTPRoute constructs an HTTPRoute when spec.httpRoute is not set. It attaches to the given gateway and routes to the InferencePool named poolName (EPPName(ms.Name) for default pool, or InferencePoolName(ms.Name) for explicit). DefaultGatewayGroup is the API group for Gateway parentRef in HTTPRoute.

View Source

const DefaultGatewayKind = "Gateway"

DefaultGatewayKind is the kind for Gateway parentRef in HTTPRoute.

View Source

const DefaultGatewayNamespace = "llm-d"

DefaultGatewayNamespace is the namespace of the Gateway in default HTTPRoute parentRefs.

View Source

const HeaderOtterScaleModelName = "OtterScale-Model-Name"

HeaderOtterScaleModelName is the HTTP header name used to match the model (value = ModelService name).

View Source

const PoolGroupAPIGroup = "inference.networking.k8s.io"

PoolGroupAPIGroup is the value for the EPP --pool-group argument (InferencePool API group).

Variables ¶

This section is empty.

Functions ¶

func ApplyImageDefaults ¶

func ApplyImageDefaults(ms *modelv1alpha1.ModelService, defaults DefaultImages)

ApplyImageDefaults fills in empty image fields on the ModelService with the operator-level defaults. The caller should pass a DeepCopy of the CR to avoid mutating the original API object.

func BuildDefaultHTTPRoute ¶

func BuildDefaultHTTPRoute(
	ms *modelv1alpha1.ModelService,
	metadataLabels map[string]string,
	gatewayName string,
	poolName string,
) *gatewayv1.HTTPRoute

func BuildDefaultInferencePool ¶

func BuildDefaultInferencePool(
	ms *modelv1alpha1.ModelService,
	metadataLabels map[string]string,
) *inferenceextv1.InferencePool

BuildDefaultInferencePool constructs a default InferencePool when ms.Spec.InferencePool is nil. The pool name is InferencePoolName(ms.Name) (no -epp suffix) so it matches the default HTTPRoute backendRef.name and the EPP --pool-name flag.

func BuildDeployment ¶

func BuildDeployment(
	ms *modelv1alpha1.ModelService,
	role *modelv1alpha1.RoleSpec,
	roleName string,
	deployName string,
	podLabels map[string]string,
	metadataLabels map[string]string,
	selectorLabels map[string]string,
	tracing TracingConfig,
	kitImage string,
) *appsv1.Deployment

BuildDeployment constructs an apps/v1 Deployment for a serving role (decode or prefill).

The model artifact is provisioned by an init container that pulls the OCI ModelPack from spec.model.image into a tmp emptyDir and runs "kit unpack" into spec.model.mountPath. The main vLLM container mounts the unpacked model read-only. GPU resources are injected automatically based on accelerator type and parallelism.

func BuildDestinationRule ¶

func BuildDestinationRule(
	ms *modelv1alpha1.ModelService,
	metadataLabels map[string]string,
) *istionetworkingv1beta1.DestinationRule

BuildDestinationRule constructs an Istio DestinationRule for the EPP Service.

The rule enables TLS SIMPLE mode with insecureSkipVerify so the Istio sidecar can communicate with the EPP service over mTLS without needing a custom CA cert.

func BuildEPPClusterRBAC ¶

func BuildEPPClusterRBAC(
	ms *modelv1alpha1.ModelService,
	metadataLabels map[string]string,
) (*rbacv1.ClusterRole, *rbacv1.ClusterRoleBinding)

BuildEPPClusterRBAC constructs a ClusterRole and ClusterRoleBinding that grant the EPP ServiceAccount permissions for metrics authentication:

tokenreviews (authenticate bearer tokens from Prometheus)
subjectaccessreviews (authorise metrics scraping)
/metrics non-resource URL access

These are cluster-scoped and cannot carry an OwnerReference to the namespace-scoped ModelService; the controller uses a Finalizer to clean them up.

func BuildEPPConfigMap ¶

func BuildEPPConfigMap(
	ms *modelv1alpha1.ModelService,
	metadataLabels map[string]string,
) *corev1.ConfigMap

BuildEPPConfigMap constructs the ConfigMap for the EPP plugins configuration.

Both modes produce an explicit config file so the operator fully controls the scheduling behaviour regardless of what the EPP image ships as built-in defaults.

func BuildEPPDeployment ¶

func BuildEPPDeployment(
	ms *modelv1alpha1.ModelService,
	eppConfig EPPConfig,
	metadataLabels map[string]string,
	selectorLabels map[string]string,
	configHash string,
) *appsv1.Deployment

BuildEPPDeployment constructs the EPP Deployment.

func BuildEPPRBAC ¶

func BuildEPPRBAC(
	ms *modelv1alpha1.ModelService,
	metadataLabels map[string]string,
	replicas int32,
) (*rbacv1.Role, *rbacv1.RoleBinding)

BuildEPPRBAC constructs the Role and RoleBinding for the EPP ServiceAccount.

The EPP needs:

Read access to pods (endpoint selection)
Read access to InferencePool (GA API)
Read access to InferenceObjective / InferenceModelRewrite (GAIE v1alpha2)
When replicas > 1: leases + events for leader election

func BuildEPPSATokenSecret ¶

func BuildEPPSATokenSecret(
	ms *modelv1alpha1.ModelService,
	metadataLabels map[string]string,
) *corev1.Secret

BuildEPPSATokenSecret constructs a ServiceAccountToken Secret for the EPP. This token is used by Prometheus to authenticate when scraping metrics.

func BuildEPPService ¶

func BuildEPPService(
	ms *modelv1alpha1.ModelService,
	metadataLabels map[string]string,
	selectorLabels map[string]string,
) *corev1.Service

BuildEPPService constructs the Service exposing the EPP's extProc and metrics ports. Safe to call when ms.Spec.InferencePool is nil (uses default port).

func BuildEPPServiceAccount ¶

func BuildEPPServiceAccount(
	ms *modelv1alpha1.ModelService,
	metadataLabels map[string]string,
) *corev1.ServiceAccount

BuildEPPServiceAccount constructs the ServiceAccount for the EPP.

func BuildEPPServiceMonitor ¶

func BuildEPPServiceMonitor(
	ms *modelv1alpha1.ModelService,
	eppConfig EPPConfig,
	metadataLabels map[string]string,
) *monitoringv1.ServiceMonitor

BuildEPPServiceMonitor constructs a ServiceMonitor that scrapes metrics from the EPP Service's metrics port. When metrics endpoint auth is enabled, the endpoint is configured with bearer token authorization from the EPP SA token Secret.

func BuildHTTPRoute ¶

func BuildHTTPRoute(
	ms *modelv1alpha1.ModelService,
	metadataLabels map[string]string,
) *gatewayv1.HTTPRoute

BuildHTTPRoute constructs a typed HTTPRoute that routes traffic from a Gateway to the InferencePool backend.

The HTTPRoute uses the InferencePool as its backend reference, allowing the Gateway API Inference Extension EPP to perform intelligent model-aware routing.

func BuildInferencePool ¶

func BuildInferencePool(
	ms *modelv1alpha1.ModelService,
	metadataLabels map[string]string,
) *inferenceextv1.InferencePool

BuildInferencePool constructs a typed InferencePool resource.

The InferencePool selector matches serving pods via the common label set (without role), so both decode and prefill pods are included in the pool. The endpointPickerRef points to the EPP Service managed by this operator.

func BuildPodMonitor ¶

func BuildPodMonitor(
	ms *modelv1alpha1.ModelService,
	roleName string,
	selectorLabels map[string]string,
	metadataLabels map[string]string,
) *monitoringv1.PodMonitor

BuildPodMonitor constructs a typed PodMonitor for a serving role.

func CleanupEPPClusterRBAC ¶

func CleanupEPPClusterRBAC(ctx context.Context, c client.Client, namespace, msName string) error

CleanupEPPClusterRBAC removes the cluster-scoped RBAC resources. Called from the controller's Finalizer path.

func ConfigMapHash ¶

func ConfigMapHash(ms *modelv1alpha1.ModelService) string

ConfigMapHash returns a SHA-256 hash of the EPP ConfigMap data. When the hash changes (e.g. switching from non-PD to PD mode), the EPP Deployment's pod template annotation triggers an automatic rollout.

func DecodeName ¶

func DecodeName(msName string) string

DecodeName returns the Deployment name for the decode role.

func DefaultReplicas ¶

func DefaultReplicas(replicas *int32) int32

DefaultReplicas returns a pointer to 1 if replicas is nil.

func EPPClusterRBACName ¶

func EPPClusterRBACName(namespace, msName string) string

EPPClusterRBACName returns a cluster-unique name for the EPP ClusterRole / ClusterRoleBinding. The namespace is embedded to avoid collisions when multiple ModelServices exist across namespaces. Uses sanitized EPP base.

func EPPConfigMapName ¶

func EPPConfigMapName(msName string) string

EPPConfigMapName returns the EPP ConfigMap name (DNS-1035 compliant base).

func EPPLabels ¶

func EPPLabels(msName, version string) map[string]string

EPPLabels returns the full label set for EPP resources. app.kubernetes.io/name is set to the EPP Deployment name (EPPName(msName)) so it matches deployment.metadata.name.

func EPPName ¶

func EPPName(msName string) string

EPPName returns the EPP resource name (Deployment, Service, SA, Role, RoleBinding, ConfigMap). Must be DNS-1035 compliant so Role/SA/Service etc. can be created when msName contains dots.

func EPPNameForService ¶

func EPPNameForService(msName string) string

EPPNameForService returns the EPP Service name; same as EPPName (kept for call-site clarity).

func EPPSecretName ¶

func EPPSecretName(msName string) string

EPPSecretName returns the EPP SA token Secret name (DNS-1035 compliant base).

func EPPSelectorLabels ¶

func EPPSelectorLabels(msName string) map[string]string

EPPSelectorLabels returns labels used for EPP pod selection. Uses EPPName(msName) so selector matches the EPP Deployment and its Pod template labels.

func EPPServiceMonitorName ¶

func EPPServiceMonitorName(msName string) string

EPPServiceMonitorName returns the EPP ServiceMonitor name (DNS-1035 compliant).

func EnsureDecodeDeployment ¶

func EnsureDecodeDeployment(
	ctx context.Context,
	c client.Client,
	scheme *runtime.Scheme,
	ms *modelv1alpha1.ModelService,
	version string,
	tracing TracingConfig,
	kitImage string,
) error

EnsureDecodeDeployment creates or updates the decode Deployment.

func EnsureDestinationRule ¶

func EnsureDestinationRule(
	ctx context.Context,
	c client.Client,
	scheme *runtime.Scheme,
	ms *modelv1alpha1.ModelService,
	eppConfig EPPConfig,
	version string,
) error

EnsureDestinationRule creates or updates the Istio DestinationRule for the EPP Service. The rule is ensured whenever the provider is Istio so that the EPP Deployment can connect to its own Service over mTLS, regardless of whether spec.inferencePool is set.

func EnsureEPPClusterRBAC ¶

func EnsureEPPClusterRBAC(
	ctx context.Context,
	c client.Client,
	ms *modelv1alpha1.ModelService,
	eppConfig EPPConfig,
	version string,
) error

EnsureEPPClusterRBAC creates or updates the cluster-scoped ClusterRole and ClusterRoleBinding for EPP metrics authentication. Since these are cluster-scoped they cannot carry an OwnerReference; the controller manages cleanup via a Finalizer on the ModelService.

func EnsureEPPConfigMap ¶

func EnsureEPPConfigMap(
	ctx context.Context,
	c client.Client,
	scheme *runtime.Scheme,
	ms *modelv1alpha1.ModelService,
	version string,
) error

EnsureEPPConfigMap creates or updates the EPP ConfigMap. Always ensured so that the EPP Deployment (created with or without spec.inferencePool) can run.

func EnsureEPPDeployment ¶

func EnsureEPPDeployment(
	ctx context.Context,
	c client.Client,
	scheme *runtime.Scheme,
	ms *modelv1alpha1.ModelService,
	eppConfig EPPConfig,
	defaultImages DefaultImages,
	version string,
	configHash string,
) error

EnsureEPPDeployment creates or updates the EPP Deployment. When spec.inferencePool is not set, a default InferencePool is used so the EPP Deployment is always created with the ModelService. The defaultImages parameter supplies operator-level image defaults for the synthetic EPP spec.

func EnsureEPPRBAC ¶

func EnsureEPPRBAC(
	ctx context.Context,
	c client.Client,
	scheme *runtime.Scheme,
	ms *modelv1alpha1.ModelService,
	version string,
) error

EnsureEPPRBAC creates or updates the EPP Role and RoleBinding. Always ensured when the EPP Deployment exists (including default EPP when InferencePool is nil) so the EPP Pod can list/watch inferencepools and pods.

func EnsureEPPSATokenSecret ¶

func EnsureEPPSATokenSecret(
	ctx context.Context,
	c client.Client,
	scheme *runtime.Scheme,
	ms *modelv1alpha1.ModelService,
	eppConfig EPPConfig,
	version string,
) error

EnsureEPPSATokenSecret creates or updates the EPP SA token Secret. The secret is only needed when metrics endpoint authentication is enabled and the provider is not GKE (GKE uses its own PodMonitoring mechanism).

func EnsureEPPService ¶

func EnsureEPPService(
	ctx context.Context,
	c client.Client,
	scheme *runtime.Scheme,
	ms *modelv1alpha1.ModelService,
	version string,
) error

EnsureEPPService creates or updates the EPP Service. Always ensured so it is created together with the EPP Deployment (with or without spec.inferencePool). BuildEPPService is safe when InferencePool is nil (uses default port).

Note: Reconcile runs only when ModelService.spec changes (GenerationChangedPredicate). If the operator was upgraded after the ModelService existed, trigger a reconcile by patching the spec (e.g. kubectl patch modelservice <name> -n <ns> --type=merge -p '{"spec":{"engine":{"port":8000}}}').

func EnsureEPPServiceAccount ¶

func EnsureEPPServiceAccount(
	ctx context.Context,
	c client.Client,
	scheme *runtime.Scheme,
	ms *modelv1alpha1.ModelService,
	version string,
) error

EnsureEPPServiceAccount creates or updates the EPP ServiceAccount. Always ensured so that the EPP Deployment (created with or without spec.inferencePool) can run.

func EnsureEPPServiceMonitor ¶

func EnsureEPPServiceMonitor(
	ctx context.Context,
	c client.Client,
	scheme *runtime.Scheme,
	ms *modelv1alpha1.ModelService,
	eppConfig EPPConfig,
	version string,
) error

EnsureEPPServiceMonitor creates or updates the EPP ServiceMonitor.

func EnsureHTTPRoute ¶

func EnsureHTTPRoute(
	ctx context.Context,
	c client.Client,
	scheme *runtime.Scheme,
	ms *modelv1alpha1.ModelService,
	version string,
	eppConfig EPPConfig,
) error

EnsureHTTPRoute creates or updates the HTTPRoute. When spec.httpRoute is set, the route is built from spec. When spec.httpRoute is nil and eppConfig.DefaultGatewayName is non-empty, a default HTTPRoute is created (attaches to that gateway, backend to the InferencePool). When both are nil/empty, any existing HTTPRoute is cleaned up.

func EnsureInferencePool ¶

func EnsureInferencePool(
	ctx context.Context,
	c client.Client,
	scheme *runtime.Scheme,
	ms *modelv1alpha1.ModelService,
	version string,
) error

EnsureInferencePool creates or updates the InferencePool. When spec.inferencePool is nil, a default InferencePool is created with name InferencePoolName(ms.Name) (no -epp suffix) so it matches the HTTPRoute backend and the EPP --pool-name flag.

func EnsurePodMonitors ¶

func EnsurePodMonitors(
	ctx context.Context,
	c client.Client,
	scheme *runtime.Scheme,
	ms *modelv1alpha1.ModelService,
	version string,
) error

EnsurePodMonitors creates or updates PodMonitors for decode (and optionally prefill).

func EnsurePrefillDeployment ¶

func EnsurePrefillDeployment(
	ctx context.Context,
	c client.Client,
	scheme *runtime.Scheme,
	ms *modelv1alpha1.ModelService,
	version string,
	tracing TracingConfig,
	kitImage string,
) error

EnsurePrefillDeployment creates or updates the prefill Deployment if configured.

func GPUCount ¶

func GPUCount(p modelv1alpha1.ParallelismSpec) int32

GPUCount calculates the number of GPUs required per pod from parallelism settings.

func GPUResourceName ¶

func GPUResourceName(accel modelv1alpha1.AcceleratorType) corev1.ResourceName

GPUResourceName returns the Kubernetes device plugin resource name for an accelerator.

func HTTPRouteLabels ¶

func HTTPRouteLabels(msName, version string) map[string]string

HTTPRouteLabels returns the metadata label set for the HTTPRoute resource. Uses component "epp" so the route is grouped with EPP-related resources.

func HTTPRouteName ¶

func HTTPRouteName(msName string) string

HTTPRouteName returns the HTTPRoute name.

func InferencePoolLabels ¶

func InferencePoolLabels(msName, version string) map[string]string

InferencePoolLabels returns the metadata label set for the InferencePool resource. Uses component "epp" so the pool is grouped with EPP-related resources.

func InferencePoolName ¶

func InferencePoolName(msName string) string

InferencePoolName returns the InferencePool name.

func InferencePoolSelectorLabels ¶

func InferencePoolSelectorLabels(msName string) map[string]string

InferencePoolSelectorLabels returns the label set that InferencePool uses to select serving pods. Uses the common selector (without role) so both decode and prefill pods are included. Includes llm-d.ai/model so the pool selects only pods for this ModelService.

func InjectGPUResources ¶

func InjectGPUResources(res *corev1.ResourceRequirements, accel modelv1alpha1.AcceleratorType, count int32)

InjectGPUResources merges GPU resource limits into the given ResourceRequirements.

func LabelsForRole ¶

func LabelsForRole(msName, component, version string) map[string]string

LabelsForRole returns the full label set for resources of a specific role.

func PluginsConfigFile ¶

func PluginsConfigFile(ms *modelv1alpha1.ModelService) string

PluginsConfigFile returns the plugins config filename based on whether Prefill/Decode disaggregation is enabled.

func PodLabelsForRole ¶

func PodLabelsForRole(msName, component, version, role string) map[string]string

PodLabelsForRole returns labels applied to serving pods, including the llm-d role and llm-d.ai/model (ModelService name). Includes LabelInferenceServing so that spec.selector.matchLabels can select these pods. Only the Deployment's Pod template gets llm-d.ai/model; the Deployment object itself does not.

func PodMonitorName ¶

func PodMonitorName(msName, role string) string

PodMonitorName returns the PodMonitor name for a role.

func PrefillName ¶

func PrefillName(msName string) string

PrefillName returns the Deployment name for the prefill role.

func SanitizeDNS1035Label ¶

func SanitizeDNS1035Label(s string) string

SanitizeDNS1035Label returns a string valid as a DNS-1035 label: lowercase alphanumeric and '-', must start with a letter, must end with an alphanumeric.

func SelectorLabelsForRole ¶

func SelectorLabelsForRole(msName, component, role string) map[string]string

SelectorLabelsForRole returns labels used for Deployment spec.selector.matchLabels (version-independent). Includes llm-d.ai/model and llm-d.ai/role so the selector matches the Pod template labels.

Types ¶

type DefaultImages ¶

type DefaultImages struct {
	// Engine is the default vLLM container image (--default-engine-image).
	Engine string
	// EPP is the default Endpoint Picker image (--default-epp-image).
	EPP string
	// RoutingProxy is the default routing proxy sidecar image (--default-routing-proxy-image).
	RoutingProxy string
}

DefaultImages holds default container images used when the CR does not specify an image. These are set via operator CLI flags so that platform administrators can control image versions at the cluster level without baking them into the CRD schema.

type EPPConfig ¶

type EPPConfig struct {
	// Provider is the infrastructure provider: "istio" (default), "gke", "none".
	// Controls whether Istio DestinationRule or GKE-specific resources are created.
	Provider string

	// Flags are additional command-line flags passed to the EPP container.
	Flags map[string]string

	// MetricsEndpointAuth controls whether the EPP metrics endpoint requires
	// authentication. When false, --metrics-endpoint-auth=false is passed.
	MetricsEndpointAuth bool

	// DefaultGatewayName is the Gateway name used when creating an HTTPRoute
	// and spec.httpRoute is not set. When non-empty, a default HTTPRoute is
	// created (parentRef to this gateway, backend to the InferencePool).
	// When empty, HTTPRoute is only created when spec.httpRoute is set.
	DefaultGatewayName string

	// Tracing holds OpenTelemetry tracing configuration.
	Tracing TracingConfig
}

EPPConfig holds cluster-level EPP settings that are shared across all ModelService instances. Per-ModelService settings (image, replicas, resources, port, failureMode) live in the CRD's EndpointPickerSpec.

type ObservationResult ¶

type ObservationResult struct {
	Phase           modelv1alpha1.ModelServicePhase
	DecodeReady     int32
	DecodeReplicas  int32
	PrefillReady    int32
	PrefillReplicas int32
	Ready           metav1.ConditionStatus
	Reason          string
	Message         string
}

ObservationResult holds the derived status from observing the current cluster state.

func ObserveStatus ¶

func ObserveStatus(
	ctx context.Context,
	c client.Client,
	ms *modelv1alpha1.ModelService,
) (ObservationResult, error)

ObserveStatus reads the current Deployment states and derives the ModelService status.

type TracingConfig ¶

type TracingConfig struct {
	Enabled              bool
	OtelExporterEndpoint string
	Sampler              string
	SamplerArg           string
}

TracingConfig holds cluster-level tracing settings for the EPP container.

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL

Documentation ¶

Index ¶

Constants ¶

Variables ¶

Functions ¶

func ApplyImageDefaults ¶

func BuildDefaultHTTPRoute ¶

func BuildDefaultInferencePool ¶

func BuildDeployment ¶

func BuildDestinationRule ¶

func BuildEPPClusterRBAC ¶

func BuildEPPConfigMap ¶

func BuildEPPDeployment ¶

func BuildEPPRBAC ¶

func BuildEPPSATokenSecret ¶

func BuildEPPService ¶

func BuildEPPServiceAccount ¶

func BuildEPPServiceMonitor ¶

func BuildHTTPRoute ¶

func BuildInferencePool ¶

func BuildPodMonitor ¶

func CleanupEPPClusterRBAC ¶

func ConfigMapHash ¶

func DecodeName ¶

func DefaultReplicas ¶

func EPPClusterRBACName ¶

func EPPConfigMapName ¶

func EPPLabels ¶

func EPPName ¶

func EPPNameForService ¶

func EPPSecretName ¶

func EPPSelectorLabels ¶

func EPPServiceMonitorName ¶

func EnsureDecodeDeployment ¶

func EnsureDestinationRule ¶

func EnsureEPPClusterRBAC ¶

func EnsureEPPConfigMap ¶

func EnsureEPPDeployment ¶

func EnsureEPPRBAC ¶

func EnsureEPPSATokenSecret ¶

func EnsureEPPService ¶

func EnsureEPPServiceAccount ¶

func EnsureEPPServiceMonitor ¶

func EnsureHTTPRoute ¶

func EnsureInferencePool ¶

func EnsurePodMonitors ¶

func EnsurePrefillDeployment ¶

func GPUCount ¶

func GPUResourceName ¶

func HTTPRouteLabels ¶

func HTTPRouteName ¶

func InferencePoolLabels ¶

func InferencePoolName ¶

func InferencePoolSelectorLabels ¶

func InjectGPUResources ¶

func LabelsForRole ¶

func PluginsConfigFile ¶

func PodLabelsForRole ¶

func PodMonitorName ¶

func PrefillName ¶

func SanitizeDNS1035Label ¶

func SelectorLabelsForRole ¶

Types ¶

type DefaultImages ¶

type EPPConfig ¶

type ObservationResult ¶

func ObserveStatus ¶

type TracingConfig ¶

Source Files ¶