Documentation
¶
Index ¶
- Constants
- Variables
- func BuildClusterServingRuntime(template aimstate.TemplateState, ownerRef metav1.OwnerReference) *servingv1alpha1.ClusterServingRuntime
- func BuildDerivedTemplate(service *aimv1alpha1.AIMService, templateName string, resolvedModelName string, ...) *aimv1alpha1.AIMServiceTemplate
- func BuildDiscoveryJob(spec DiscoveryJobSpec) *batchv1.Job
- func BuildInferenceService(serviceState aimstate.ServiceState, ownerRef metav1.OwnerReference) *servingv1beta1.InferenceService
- func BuildInferenceServiceHTTPRoute(serviceState aimstate.ServiceState, ownerRef metav1.OwnerReference) *gatewayapiv1.HTTPRoute
- func BuildServingRuntime(template aimstate.TemplateState, ownerRef metav1.OwnerReference) *servingv1alpha1.ServingRuntime
- func BuildServingRuntimeFromState(state aimstate.TemplateState, ownerRef metav1.OwnerReference) *servingv1alpha1.ServingRuntime
- func BuildTemplateStateFromObservation(name, namespace string, specCommon aimv1alpha1.AIMServiceTemplateSpecCommon, ...) aimstate.TemplateState
- func CountActiveDiscoveryJobs(ctx context.Context, k8sClient client.Client) (int, error)
- func DefaultRoutePath(service *aimv1alpha1.AIMService) string
- func DerivedTemplateName(baseName, suffix string) string
- func EvaluateHTTPRouteStatus(route *gatewayapiv1.HTTPRoute) (bool, string, string)
- func EvaluateInferenceServiceStatus(status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, ...)
- func EvaluateRoutingStatus(service *aimv1alpha1.AIMService, obs *ServiceObservation, ...) (enabled bool, ready bool, hasFatalError bool)
- func FormatRuntimeConfigSources(resolution *RuntimeConfigResolution, namespaceLabel string) []string
- func GenerateInferenceServiceName(serviceName, namespace string) string
- func GetClusterGPUResources(ctx context.Context, k8sClient client.Client) (map[string]GPUResourceInfo, error)
- func GetClusterServingRuntime(ctx context.Context, k8sClient client.Client, name string) (*servingv1alpha1.ClusterServingRuntime, error)
- func GetDiscoveryJob(ctx context.Context, k8sClient client.Client, namespace, templateName string) (*batchv1.Job, error)
- func GetImageConfigLabels(ctx context.Context, imageURI string, keychain authn.Keychain) (map[string]string, error)
- func GetOperatorNamespace() string
- func GetServingRuntime(ctx context.Context, k8sClient client.Client, namespace, name string) (*servingv1alpha1.ServingRuntime, error)
- func HandleImageMissing(status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, ...) bool
- func HandleImageNotReady(status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, ...) bool
- func HandleInferenceServicePodImageError(status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, ...) bool
- func HandleMissingModelSource(status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, ...) bool
- func HandleModelResolutionFailure(status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, ...) bool
- func HandlePathTemplateError(status *aimv1alpha1.AIMServiceStatus, service *aimv1alpha1.AIMService, ...) bool
- func HandleReconcileErrors(status *aimv1alpha1.AIMServiceStatus, ...) bool
- func HandleRuntimeConfigMissing(status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, ...) bool
- func HandleTemplateDegraded(status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, ...) bool
- func HandleTemplateNotAvailable(status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, ...) bool
- func HandleTemplateSelectionFailure(status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, ...) bool
- func HasOwnerReference(refs []metav1.OwnerReference, uid types.UID) bool
- func InferenceServiceRouteName(serviceName string) string
- func InspectImage(ctx context.Context, imageURI string, ...) (*aimv1alpha1.ImageMetadata, error)
- func IsDerivedTemplate(labels map[string]string) bool
- func IsGPUAvailable(ctx context.Context, k8sClient client.Client, gpuModel string) (bool, error)
- func IsJobComplete(job *batchv1.Job) bool
- func IsJobFailed(job *batchv1.Job) bool
- func IsJobSucceeded(job *batchv1.Job) bool
- func JoinRuntimeConfigSources(resolution *RuntimeConfigResolution, namespaceLabel string) string
- func ListAvailableGPUs(ctx context.Context, k8sClient client.Client) ([]string, error)
- func NodeGPUChangePredicate() predicate.Predicate
- func NormalizeRuntimeConfigName(name string) string
- func ObserveDerivedTemplate(ctx context.Context, k8sClient client.Client, service *aimv1alpha1.AIMService, ...) error
- func ObserveNonDerivedTemplate(ctx context.Context, k8sClient client.Client, service *aimv1alpha1.AIMService, ...) error
- func OverridesSuffix(overrides *aimv1alpha1.AIMServiceOverrides) string
- func PlanImageResources(ctx context.Context, input ImagePlanInput) ([]client.Object, *aimv1alpha1.ImageMetadata, error)
- func PlanTemplateResources(ctx TemplatePlanContext, builders TemplatePlanBuilders) []client.Object
- func PopulateObservationFromClusterTemplate(ctx context.Context, k8sClient client.Client, service *aimv1alpha1.AIMService, ...) error
- func PopulateObservationFromNamespaceTemplate(ctx context.Context, k8sClient client.Client, service *aimv1alpha1.AIMService, ...) error
- func ProjectImageStatus(status *aimv1alpha1.AIMModelStatus, spec aimv1alpha1.AIMModelSpec, ...)
- func ProjectServiceStatus(service *aimv1alpha1.AIMService, obs *ServiceObservation, ...)
- func ProjectTemplateStatus(ctx context.Context, k8sClient client.Client, clientset kubernetes.Interface, ...) error
- func RequestsForServices(services []aimv1alpha1.AIMService) []reconcile.Request
- func ResolveServiceRoutePath(service *aimv1alpha1.AIMService, ...) (string, error)
- func ResolveTemplateNameForService(ctx context.Context, k8sClient client.Client, service *aimv1alpha1.AIMService) (TemplateResolution, TemplateSelectionStatus, error)
- func RuntimeConfigNameForService(service *aimv1alpha1.AIMService, ...) string
- func TemplateNameFromSpec(service *aimv1alpha1.AIMService) string
- func TemplateRequiresGPU(spec aimv1alpha1.AIMServiceTemplateSpecCommon) bool
- func UpdateTemplateGPUAvailability(ctx context.Context, k8sClient client.Client, ...) error
- type DiscoveryJobSpec
- type GPUResourceInfo
- type ImageLookupResult
- type ImageObservation
- type ImageObservationOptions
- type ImagePlanInput
- type ImagePullError
- type ImagePullErrorType
- type ImageRegistryError
- type MetadataFormatError
- type ModelReference
- type ParsedDiscovery
- type RuntimeConfigResolution
- type RuntimeObservation
- type ServiceObservation
- type TemplateCandidate
- type TemplateObservation
- type TemplateObservationOptions
- type TemplatePlanBuilders
- type TemplatePlanContext
- type TemplatePlanInput
- type TemplateResolution
- type TemplateScope
- func LoadBaseTemplateSpec(ctx context.Context, k8sClient client.Client, service *aimv1alpha1.AIMService, ...) (*aimv1alpha1.AIMServiceTemplateSpec, TemplateScope, error)
- func ResolveOrCreateModelFromImage(ctx context.Context, k8sClient client.Client, serviceNamespace string, ...) (modelName string, scope TemplateScope, err error)
- type TemplateSelectionStatus
- type TemplateSpec
- type TemplateWithStatus
Constants ¶
const ( // DefaultRuntimeConfigName is the name of the default AIM runtime config DefaultRuntimeConfigName = "default" // MaxConcurrentDiscoveryJobs is the global limit for concurrent discovery jobs across all namespaces MaxConcurrentDiscoveryJobs = 10 // AimLabelDomain is the base domain used for AIM-specific labels. AimLabelDomain = "aim.silogen.ai" // AIM label keys. LabelKeyTemplate = AimLabelDomain + "/template" LabelKeyModelID = AimLabelDomain + "/model-id" LabelKeyDerivedTemplate = AimLabelDomain + "/derived-template" LabelKeyAutoGenerated = AimLabelDomain + "/auto-generated" LabelKeyImageName = AimLabelDomain + "/aim-image" LabelKeyMetric = AimLabelDomain + "/template.metric" LabelKeyPrecision = AimLabelDomain + "/template.precision" LabelKeyServiceName = AimLabelDomain + "/service-name" // AIM label values. LabelValueRuntimeName = "aim-runtime" LabelValueRuntimeComponent = "serving-runtime" LabelValueManagedBy = "aim-controller" LabelValueDiscoveryName = "aim-discovery" LabelValueDiscoveryComponent = "discovery-job" LabelValueServiceName = "aim-service" LabelValueServiceComponent = "inference-service" LabelValueDerivedTemplate = "true" LabelValueAutoGenerated = "true" )
const ( // DiscoveryJobBackoffLimit is the number of retries before marking the discovery job as failed DiscoveryJobBackoffLimit = 3 // DiscoveryJobTTLSeconds defines how long completed discovery jobs persist // before automatic cleanup. This allows time for status inspection and log retrieval. DiscoveryJobTTLSeconds = 60 )
const ( // DefaultGPUResourceName is the default resource name for AMD GPUs in Kubernetes DefaultGPUResourceName = "amd.com/gpu" // This is required for efficient inter-process communication in model serving workloads. DefaultSharedMemorySize = "8Gi" // KubernetesLabelValueMaxLength is the maximum length for a Kubernetes label value KubernetesLabelValueMaxLength = 63 )
const (
// LabelAutoCreated marks models that were automatically created from service image references
LabelAutoCreated = "aim.silogen.ai/auto-created"
)
const ( // MaxRoutePathLength is the maximum allowed length for a route path. // This prevents excessively long paths that could cause issues with gateways or proxies. MaxRoutePathLength = 200 )
Variables ¶
var ErrImageNotFound = errors.New("image not found in catalog")
ErrImageNotFound is returned when an image is not found in the catalog
var ( // ErrMultipleModelsFound is returned when multiple models exist with the same image URI ErrMultipleModelsFound = errors.New("multiple models found with the same image") )
var ErrRuntimeConfigNotFound = errors.New("runtime config not found")
ErrRuntimeConfigNotFound indicates that neither namespace nor cluster runtime config could be located.
var GPUPreferenceOrder = []string{
"MI325X",
"MI300X",
"MI250X",
"MI210",
"A100",
"H100",
}
GPUPreferenceOrder defines the preference order for GPU models when selecting templates. GPUs earlier in the list are preferred over later ones. TODO: Fill in the complete preference order based on performance characteristics.
var MetricPreferenceOrder = []string{
"latency",
"throughput",
}
MetricPreferenceOrder defines preference for optimization metrics. "latency" is preferred over "throughput" by default.
var PrecisionPreferenceOrder = []string{
"fp8",
"fp16",
"bf16",
"fp32",
}
PrecisionPreferenceOrder defines preference for precision levels. Lower precision (more optimized) is preferred.
Functions ¶
func BuildClusterServingRuntime ¶
func BuildClusterServingRuntime(template aimstate.TemplateState, ownerRef metav1.OwnerReference) *servingv1alpha1.ClusterServingRuntime
BuildClusterServingRuntime creates a KServe ClusterServingRuntime for a cluster-scoped template.
func BuildDerivedTemplate ¶
func BuildDerivedTemplate( service *aimv1alpha1.AIMService, templateName string, resolvedModelName string, baseSpec *aimv1alpha1.AIMServiceTemplateSpec, ) *aimv1alpha1.AIMServiceTemplate
BuildDerivedTemplate constructs an AIMServiceTemplate for a service with overrides. The template inherits from the base spec and applies service-specific customizations.
func BuildDiscoveryJob ¶
func BuildDiscoveryJob(spec DiscoveryJobSpec) *batchv1.Job
BuildDiscoveryJob creates a Job that runs model discovery dry-run
func BuildInferenceService ¶
func BuildInferenceService(serviceState aimstate.ServiceState, ownerRef metav1.OwnerReference) *servingv1beta1.InferenceService
BuildInferenceService constructs a KServe InferenceService referencing a ServingRuntime or ClusterServingRuntime.
func BuildInferenceServiceHTTPRoute ¶
func BuildInferenceServiceHTTPRoute(serviceState aimstate.ServiceState, ownerRef metav1.OwnerReference) *gatewayapiv1.HTTPRoute
BuildInferenceServiceHTTPRoute creates an HTTPRoute that exposes the predictor service via the provided gateway parent.
func BuildServingRuntime ¶
func BuildServingRuntime(template aimstate.TemplateState, ownerRef metav1.OwnerReference) *servingv1alpha1.ServingRuntime
BuildServingRuntime creates a KServe ServingRuntime for a namespace-scoped template.
func BuildServingRuntimeFromState ¶
func BuildServingRuntimeFromState(state aimstate.TemplateState, ownerRef metav1.OwnerReference) *servingv1alpha1.ServingRuntime
BuildServingRuntimeFromState constructs a namespaced ServingRuntime from a TemplateState snapshot. This is an adapter function that maintains compatibility with the original signature.
func BuildTemplateStateFromObservation ¶
func BuildTemplateStateFromObservation( name, namespace string, specCommon aimv1alpha1.AIMServiceTemplateSpecCommon, observation *TemplateObservation, runtimeConfigSpec aimv1alpha1.AIMRuntimeConfigSpec, status *aimv1alpha1.AIMServiceTemplateStatus, ) aimstate.TemplateState
BuildTemplateStateFromObservation constructs a TemplateState from the template specification, observation, and status. This is an adapter function that combines template metadata with observed resources.
func CountActiveDiscoveryJobs ¶
CountActiveDiscoveryJobs counts the number of active (non-complete) discovery jobs across all namespaces. A job is considered active if it exists and is not in a complete state (succeeded or failed).
func DefaultRoutePath ¶
func DefaultRoutePath(service *aimv1alpha1.AIMService) string
DefaultRoutePath returns the default HTTP route prefix.
func DerivedTemplateName ¶
DerivedTemplateName constructs a template name from a base name and suffix. Ensures the final name does not exceed Kubernetes name length limits.
func EvaluateHTTPRouteStatus ¶
func EvaluateHTTPRouteStatus(route *gatewayapiv1.HTTPRoute) (bool, string, string)
EvaluateHTTPRouteStatus checks the HTTPRoute status and returns readiness state.
func EvaluateInferenceServiceStatus ¶
func EvaluateInferenceServiceStatus( status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, inferenceService *servingv1beta1.InferenceService, httpRoute *gatewayapiv1.HTTPRoute, routingEnabled bool, routingReady bool, setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string), )
EvaluateInferenceServiceStatus checks InferenceService and routing readiness. Updates status conditions based on the InferenceService and routing state.
func EvaluateRoutingStatus ¶
func EvaluateRoutingStatus( service *aimv1alpha1.AIMService, obs *ServiceObservation, status *aimv1alpha1.AIMServiceStatus, setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string), ) (enabled bool, ready bool, hasFatalError bool)
EvaluateRoutingStatus checks routing configuration and updates status accordingly. Returns (enabled, ready, hasFatalError) to indicate if routing is enabled, if it's ready, and if there's a terminal error.
func FormatRuntimeConfigSources ¶
func FormatRuntimeConfigSources(resolution *RuntimeConfigResolution, namespaceLabel string) []string
FormatRuntimeConfigSources renders a human-readable list of runtime config sources for logging/events.
func GenerateInferenceServiceName ¶
GenerateInferenceServiceName creates a KServe InferenceService name that fits DNS label constraints. KServe creates hostnames in the format: {isvc-name}-predictor-{namespace} These hostnames must be ≤ 63 characters to comply with DNS label limits.
If the original name would exceed the limit, this function: 1. Truncates the base name 2. Appends an 8-character hash of the full original name 3. Ensures the result is RFC1123 compliant
The hash ensures uniqueness while keeping names deterministic and short.
func GetClusterGPUResources ¶
func GetClusterGPUResources(ctx context.Context, k8sClient client.Client) (map[string]GPUResourceInfo, error)
GetClusterGPUResources returns an aggregated view of all GPU resources in the cluster. It scans all nodes and aggregates resources that start with "amd.com/" or "nvidia.com/". Returns a map where keys are GPU models (e.g., "MI300X", "A100") extracted from node labels, and values contain the resource name.
func GetClusterServingRuntime ¶
func GetClusterServingRuntime(ctx context.Context, k8sClient client.Client, name string) (*servingv1alpha1.ClusterServingRuntime, error)
GetClusterServingRuntime fetches a ClusterServingRuntime by name
func GetDiscoveryJob ¶
func GetDiscoveryJob(ctx context.Context, k8sClient client.Client, namespace, templateName string) (*batchv1.Job, error)
GetDiscoveryJob fetches the discovery job for a template. Returns the newest job (by CreationTimestamp) if multiple exist.
func GetImageConfigLabels ¶
func GetImageConfigLabels(ctx context.Context, imageURI string, keychain authn.Keychain) (map[string]string, error)
GetImageConfigLabels is a helper function that retrieves just the labels from an image without parsing them into structured metadata. Useful for debugging.
func GetOperatorNamespace ¶
func GetOperatorNamespace() string
GetOperatorNamespace returns the namespace where the AIM operator runs. It reads the AIM_OPERATOR_NAMESPACE environment variable; if unset, it defaults to "kaiwo-system".
func GetServingRuntime ¶
func GetServingRuntime(ctx context.Context, k8sClient client.Client, namespace, name string) (*servingv1alpha1.ServingRuntime, error)
GetServingRuntime fetches a ServingRuntime by namespace and name
func HandleImageMissing ¶
func HandleImageMissing( status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string), ) bool
HandleImageMissing checks for missing image and updates status. Returns true if the image is missing.
func HandleImageNotReady ¶
func HandleImageNotReady( status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string), ) bool
HandleImageNotReady checks if the resolved image is not yet ready and updates status. Returns true if the service should wait for the image to become ready.
func HandleInferenceServicePodImageError ¶
func HandleInferenceServicePodImageError( status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string), ) bool
HandleInferenceServicePodImageError checks for image pull errors in InferenceService pods. Returns true if an image pull error was detected.
func HandleMissingModelSource ¶
func HandleMissingModelSource( status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string), ) bool
HandleMissingModelSource checks if the template is available but has no model sources. Returns true if model sources are missing (discovery succeeded but produced no usable sources).
func HandleModelResolutionFailure ¶
func HandleModelResolutionFailure( status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string), ) bool
HandleModelResolutionFailure checks for model resolution failures and updates status. Returns true if model resolution failed.
func HandlePathTemplateError ¶
func HandlePathTemplateError( status *aimv1alpha1.AIMServiceStatus, service *aimv1alpha1.AIMService, obs *ServiceObservation, setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string), ) bool
HandlePathTemplateError checks for path template errors and updates status. Returns true if there is a path template error. This can occur when routing is enabled (via service spec or runtime config) but the path template is invalid.
func HandleReconcileErrors ¶
func HandleReconcileErrors( status *aimv1alpha1.AIMServiceStatus, setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string), errs controllerutils.ReconcileErrors, ) bool
HandleReconcileErrors processes reconciliation errors and updates service status. Returns true if errors were found and handled.
func HandleRuntimeConfigMissing ¶
func HandleRuntimeConfigMissing( status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string), ) bool
HandleRuntimeConfigMissing checks for missing runtime config and updates status. Returns true if the runtime config is missing.
func HandleTemplateDegraded ¶
func HandleTemplateDegraded( status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string), ) bool
HandleTemplateDegraded checks if the template is degraded, not available, or failed and updates status. Returns true if the template is degraded, not available, or failed.
func HandleTemplateNotAvailable ¶
func HandleTemplateNotAvailable( status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string), ) bool
HandleTemplateNotAvailable checks if the template is not available and updates status. Returns true if the template is not yet available (Pending or Progressing). Sets the service to Pending state because it's waiting for a dependency (the template).
func HandleTemplateSelectionFailure ¶
func HandleTemplateSelectionFailure( status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string), ) bool
HandleTemplateSelectionFailure reports failures during automatic template selection.
func HasOwnerReference ¶
func HasOwnerReference(refs []metav1.OwnerReference, uid types.UID) bool
HasOwnerReference checks if the given UID exists in the owner references list.
func InferenceServiceRouteName ¶
InferenceServiceRouteName returns the canonical HTTPRoute name for an InferenceService.
func InspectImage ¶
func InspectImage( ctx context.Context, imageURI string, imagePullSecrets []corev1.LocalObjectReference, clientset kubernetes.Interface, namespace string, ) (*aimv1alpha1.ImageMetadata, error)
InspectImage extracts metadata from a container image using the provided image pull secrets. It uses go-containerregistry to authenticate and fetch image labels, then parses them into the ImageMetadata structure.
Parameters:
- ctx: Context for the operation
- imageURI: Full container image reference (e.g., "registry.example.com/repo/image:tag")
- imagePullSecrets: Kubernetes image pull secrets for authentication
- clientset: Kubernetes clientset for accessing secrets
- namespace: Namespace where the secrets are located
Returns:
- *ImageMetadata: Extracted metadata if successful
- error: Any error encountered during inspection (authentication, network, parsing, etc.) Registry access errors are wrapped in ImageRegistryError for categorization.
func IsDerivedTemplate ¶
IsDerivedTemplate returns true when the provided labels indicate a controller-managed derived template.
func IsGPUAvailable ¶
IsGPUAvailable checks if a specific GPU model is available in the cluster. The gpuModel parameter should be the GPU model name (e.g., "MI300X", "A100"), not the resource name. The input is normalized to handle variants like "MI300X (rev 2)" or "Instinct MI300X".
func IsJobComplete ¶
IsJobComplete returns true if the job has completed (successfully or failed)
func IsJobFailed ¶
IsJobFailed returns true if the job failed
func IsJobSucceeded ¶
IsJobSucceeded returns true if the job completed successfully
func JoinRuntimeConfigSources ¶
func JoinRuntimeConfigSources(resolution *RuntimeConfigResolution, namespaceLabel string) string
JoinRuntimeConfigSources joins runtime config sources for concise logging.
func ListAvailableGPUs ¶
ListAvailableGPUs returns a list of all GPU resource types available in the cluster.
func NodeGPUChangePredicate ¶
NodeGPUChangePredicate returns a predicate that triggers reconciles when GPU-related node attributes change.
func NormalizeRuntimeConfigName ¶
NormalizeRuntimeConfigName returns the effective name to use for lookups when the user omits the field.
func ObserveDerivedTemplate ¶
func ObserveDerivedTemplate( ctx context.Context, k8sClient client.Client, service *aimv1alpha1.AIMService, resolution TemplateResolution, obs *ServiceObservation, ) error
ObserveDerivedTemplate handles observation for services with derived templates. It fetches the derived template if it exists, or loads the base template spec for creation.
func ObserveNonDerivedTemplate ¶
func ObserveNonDerivedTemplate( ctx context.Context, k8sClient client.Client, service *aimv1alpha1.AIMService, templateName string, preferredScope TemplateScope, obs *ServiceObservation, ) error
ObserveNonDerivedTemplate handles observation for services with non-derived templates. It searches for namespace-scoped templates first, then falls back to cluster-scoped templates. Does not set ShouldCreateTemplate - that decision is made in the controller based on whether an explicit templateRef was provided.
func OverridesSuffix ¶
func OverridesSuffix(overrides *aimv1alpha1.AIMServiceOverrides) string
OverridesSuffix computes a hash suffix for service overrides.
func PlanImageResources ¶
func PlanImageResources(ctx context.Context, input ImagePlanInput) ([]client.Object, *aimv1alpha1.ImageMetadata, error)
PlanImageResources plans the desired state for an image resource. It performs metadata extraction if needed and creates ServiceTemplates based on recommendedDeployments.
func PlanTemplateResources ¶
func PlanTemplateResources(ctx TemplatePlanContext, builders TemplatePlanBuilders) []client.Object
PlanTemplateResources produces desired objects based on the observation and controller-provided builders. It respects the global limit on concurrent discovery jobs (MaxConcurrentDiscoveryJobs).
func PopulateObservationFromClusterTemplate ¶
func PopulateObservationFromClusterTemplate( ctx context.Context, k8sClient client.Client, service *aimv1alpha1.AIMService, template *aimv1alpha1.AIMClusterServiceTemplate, obs *ServiceObservation, ) error
PopulateObservationFromClusterTemplate extracts data from a cluster-scoped template into the observation.
func PopulateObservationFromNamespaceTemplate ¶
func PopulateObservationFromNamespaceTemplate( ctx context.Context, k8sClient client.Client, service *aimv1alpha1.AIMService, template *aimv1alpha1.AIMServiceTemplate, obs *ServiceObservation, ) error
PopulateObservationFromNamespaceTemplate extracts data from a namespace-scoped template into the observation.
func ProjectImageStatus ¶
func ProjectImageStatus( status *aimv1alpha1.AIMModelStatus, spec aimv1alpha1.AIMModelSpec, observation *ImageObservation, extractedMetadata *aimv1alpha1.ImageMetadata, extractionErr error, observedGeneration int64, )
ProjectImageStatus updates the status of an image resource based on observation and errors.
func ProjectServiceStatus ¶
func ProjectServiceStatus( service *aimv1alpha1.AIMService, obs *ServiceObservation, inferenceService *servingv1beta1.InferenceService, httpRoute *gatewayapiv1.HTTPRoute, errs controllerutils.ReconcileErrors, )
ProjectServiceStatus computes and updates the service status based on observations and errors. This is a high-level orchestrator that calls the individual status handler functions.
func ProjectTemplateStatus ¶
func ProjectTemplateStatus( ctx context.Context, k8sClient client.Client, clientset kubernetes.Interface, recorder record.EventRecorder, template TemplateWithStatus, obs *TemplateObservation, errs controllerutils.ReconcileErrors, imageNotFoundMessage string, ) error
ProjectTemplateStatus computes status from observation and errors. This is shared between cluster and namespace-scoped template controllers. Modifies templateStatus directly and emits events for discovery phase changes.
func RequestsForServices ¶
func RequestsForServices(services []aimv1alpha1.AIMService) []reconcile.Request
RequestsForServices converts a list of AIMServices to reconcile requests.
func ResolveServiceRoutePath ¶
func ResolveServiceRoutePath(service *aimv1alpha1.AIMService, runtimeConfig aimv1alpha1.AIMRuntimeConfigSpec) (string, error)
ResolveServiceRoutePath renders the HTTP route prefix using service and runtime config context.
func ResolveTemplateNameForService ¶
func ResolveTemplateNameForService( ctx context.Context, k8sClient client.Client, service *aimv1alpha1.AIMService, ) (TemplateResolution, TemplateSelectionStatus, error)
ResolveTemplateNameForService determines the template name to use for a service. It handles default template lookup, base template resolution, and derived template naming. Returns an empty BaseName/FinalName if no template can be resolved, which indicates the service should enter a degraded state.
func RuntimeConfigNameForService ¶
func RuntimeConfigNameForService(service *aimv1alpha1.AIMService, templateSpec aimv1alpha1.AIMServiceTemplateSpecCommon) string
RuntimeConfigNameForService determines the effective runtime config name for a service.
func TemplateNameFromSpec ¶
func TemplateNameFromSpec(service *aimv1alpha1.AIMService) string
TemplateNameFromSpec returns the template name from the service spec or status. Falls back to service name if no template reference is found.
func TemplateRequiresGPU ¶
func TemplateRequiresGPU(spec aimv1alpha1.AIMServiceTemplateSpecCommon) bool
TemplateRequiresGPU returns true if the template spec declares a GPU selector with a model.
func UpdateTemplateGPUAvailability ¶
func UpdateTemplateGPUAvailability( ctx context.Context, k8sClient client.Client, spec aimv1alpha1.AIMServiceTemplateSpecCommon, obs *TemplateObservation, ) error
UpdateTemplateGPUAvailability checks whether the GPU model declared by the template exists in the cluster. It updates the provided TemplateObservation with the result of the check. The GPU model is normalized to ensure consistent matching across different label formats.
Types ¶
type DiscoveryJobSpec ¶
type DiscoveryJobSpec struct {
TemplateName string
TemplateSpec aimv1alpha1.AIMServiceTemplateSpecCommon
Namespace string
ModelID string
Image string
Env []corev1.EnvVar
ImagePullSecrets []corev1.LocalObjectReference
ServiceAccount string
OwnerRef metav1.OwnerReference
}
DiscoveryJobSpec defines parameters for creating a discovery job
type GPUResourceInfo ¶
type GPUResourceInfo struct {
// ResourceName is the full Kubernetes resource name (e.g., "amd.com/gpu").
ResourceName string
}
GPUResourceInfo contains GPU resource information for a specific GPU model.
type ImageLookupResult ¶
type ImageLookupResult struct {
Image string
Resources corev1.ResourceRequirements
}
ImageLookupResult captures the resolved image metadata from the catalog.
func LookupImageForClusterTemplate ¶
func LookupImageForClusterTemplate(ctx context.Context, k8sClient client.Client, modelName string) (*ImageLookupResult, error)
LookupImageForClusterTemplate looks up the container image for a cluster-scoped template. It searches only in AIMClusterModel resources. Returns ErrImageNotFound if no image is found in the catalog.
func LookupImageForNamespaceTemplate ¶
func LookupImageForNamespaceTemplate(ctx context.Context, k8sClient client.Client, namespace, modelName string) (*ImageLookupResult, error)
LookupImageForNamespaceTemplate looks up the container image for a namespace-scoped template. It searches AIMModel resources in the specified namespace first, then falls back to cluster-scoped AIMClusterModel resources. Returns ErrImageNotFound if no image is found in either location.
func (*ImageLookupResult) DeepCopy ¶
func (r *ImageLookupResult) DeepCopy() *ImageLookupResult
DeepCopy returns a deep copy of the ImageLookupResult.
type ImageObservation ¶
type ImageObservation struct {
// MetadataAlreadyAttempted is true if we've already attempted metadata extraction.
MetadataAlreadyAttempted bool
// MetadataExtracted is true if metadata was successfully extracted.
MetadataExtracted bool
// ImageMetadata contains the extracted metadata (if extraction succeeded).
ImageMetadata *aimv1alpha1.ImageMetadata
// RuntimeConfigResolution contains the resolved runtime config (for image pull secrets).
RuntimeConfigResolution *RuntimeConfigResolution
// ExistingTemplates are the ServiceTemplates currently owned by this image.
ExistingTemplates []client.Object
// DiscoveryEnabled reflects whether discovery is enabled from runtime config.
// Discovery is now always attempted unless disabled by runtime config.
DiscoveryEnabled bool
// MetadataError captures the latest metadata format issue encountered during extraction.
MetadataError *MetadataFormatError
// RegistryError captures categorized registry access errors (auth, not-found, etc.).
RegistryError *ImageRegistryError
// MetadataExtractionErr captures non-format extraction failures (e.g., registry or auth errors).
MetadataExtractionErr error
// TemplatesAutoGenerated tracks whether auto-generated templates were requested this cycle.
TemplatesAutoGenerated bool
}
ImageObservation holds the observed state for an AIMModel or AIMClusterModel.
func ObserveImage ¶
func ObserveImage(ctx context.Context, opts ImageObservationOptions) (*ImageObservation, error)
ObserveImage gathers the current state for an image resource.
type ImageObservationOptions ¶
type ImageObservationOptions struct {
// GetRuntimeConfig returns the runtime config for this scope (namespace or cluster).
GetRuntimeConfig func(ctx context.Context) (*RuntimeConfigResolution, error)
// ListOwnedTemplates returns templates owned by this image.
ListOwnedTemplates func(ctx context.Context) ([]client.Object, error)
// GetCurrentStatus returns the current status to check for existing conditions.
GetCurrentStatus func() *aimv1alpha1.AIMModelStatus
// GetImageSpec returns the image spec.
GetImageSpec func() aimv1alpha1.AIMModelSpec
}
ImageObservationOptions provides callbacks for observing image state.
type ImagePlanInput ¶
type ImagePlanInput struct {
// ImageName is the name of the image resource.
ImageName string
// Namespace is the namespace (empty for cluster-scoped).
Namespace string
// ImageSpec is the image specification.
ImageSpec aimv1alpha1.AIMModelSpec
// Observation is the observed state.
Observation *ImageObservation
// OwnerReference for created templates.
OwnerReference []metav1.OwnerReference
// Clientset for image inspection.
Clientset kubernetes.Interface
// IsClusterScoped indicates if this is a cluster-scoped image.
IsClusterScoped bool
}
ImagePlanInput provides the input for planning image resources.
type ImagePullError ¶
type ImagePullError struct {
Type ImagePullErrorType
Container string
Reason string // e.g., "ImagePullBackOff", "ErrImagePull"
Message string // Full error message from Kubernetes
IsInitContainer bool
}
ImagePullError contains categorized information about an image pull failure
func CheckInferenceServicePodImagePullStatus ¶
func CheckInferenceServicePodImagePullStatus(ctx context.Context, k8sClient client.Client, inferenceServiceName, namespace string) *ImagePullError
CheckInferenceServicePodImagePullStatus checks if an InferenceService's pods are stuck in ImagePullBackOff or ErrImagePull state. It looks for pods with the isvc.serving.kserve.io/inferenceservice label matching the InferenceService name. Returns the image pull error details if found, or nil otherwise.
type ImagePullErrorType ¶
type ImagePullErrorType string
ImagePullErrorType categorizes image pull errors
const ( ImagePullErrorAuth ImagePullErrorType = "auth" ImagePullErrorNotFound ImagePullErrorType = "not-found" ImagePullErrorGeneric ImagePullErrorType = "generic" )
type ImageRegistryError ¶
type ImageRegistryError struct {
Type ImagePullErrorType // From template.go
Message string
Cause error
}
ImageRegistryError wraps registry access errors with categorization
func (*ImageRegistryError) Error ¶
func (e *ImageRegistryError) Error() string
func (*ImageRegistryError) Unwrap ¶
func (e *ImageRegistryError) Unwrap() error
type MetadataFormatError ¶
MetadataFormatError indicates the image metadata is malformed and cannot be processed.
func (*MetadataFormatError) Error ¶
func (e *MetadataFormatError) Error() string
type ModelReference ¶
type ModelReference struct {
Name string
Scope TemplateScope
}
ModelReference represents a found model
type ParsedDiscovery ¶
type ParsedDiscovery struct {
ModelSources []aimv1alpha1.AIMModelSource
Profile *aimv1alpha1.AIMProfile
}
ParsedDiscovery holds the parsed discovery result
func ParseDiscoveryLogs ¶
func ParseDiscoveryLogs(ctx context.Context, k8sClient client.Client, clientset kubernetes.Interface, job *batchv1.Job) (*ParsedDiscovery, error)
ParseDiscoveryLogs parses the discovery job output to extract model sources and profile. Reads pod logs from the completed job and parses the JSON output.
type RuntimeConfigResolution ¶
type RuntimeConfigResolution struct {
// Name is the runtime config name requested by the consumer.
Name string
// Namespace is the consumer namespace used when searching for AIMRuntimeConfig.
Namespace string
ClusterConfig *aimv1alpha1.AIMClusterRuntimeConfig
NamespaceConfig *aimv1alpha1.AIMRuntimeConfig
ClusterConfigNotFound bool
NamespaceConfigNotFound bool
EffectiveSpec aimv1alpha1.AIMRuntimeConfigSpec
ResolvedRef *aimv1alpha1.AIMResolvedRuntimeConfig
}
RuntimeConfigResolution captures the resolved runtime configuration. When both namespace and cluster configs exist, they are merged with namespace config taking precedence.
func ResolveRuntimeConfig ¶
func ResolveRuntimeConfig(ctx context.Context, k8sClient client.Client, namespace, configName string) (*RuntimeConfigResolution, error)
ResolveRuntimeConfig resolves runtime config with field-level merging. When both cluster and namespace configs exist, cluster config is used as base and namespace config fields override/merge on top. When configName is empty, the default runtime config name is used.
type RuntimeObservation ¶
type RuntimeObservation[R client.Object] struct { Runtime R TemplateObservation }
RuntimeObservation combines TemplateObservation with a controller-specific runtime object.
func ObserveTemplate ¶
func ObserveTemplate[R client.Object](ctx context.Context, opts TemplateObservationOptions[R]) (*RuntimeObservation[R], error)
ObserveTemplate gathers runtime, discovery job, image, and runtime config information with common error handling.
type ServiceObservation ¶
type ServiceObservation struct {
TemplateName string
BaseTemplateName string
Scope TemplateScope
AutoSelectedTemplate bool
TemplateAvailable bool
TemplateOwnedByService bool
ShouldCreateTemplate bool
RuntimeConfigSpec aimv1alpha1.AIMRuntimeConfigSpec
ResolvedRuntimeConfig *aimv1alpha1.AIMResolvedRuntimeConfig
ResolvedImage *aimv1alpha1.AIMResolvedReference
RoutePath string
PathTemplateErr error
RuntimeConfigErr error
ImageErr error
ModelResolutionErr error
TemplateStatus *aimv1alpha1.AIMServiceTemplateStatus
TemplateSpecCommon aimv1alpha1.AIMServiceTemplateSpecCommon
TemplateSpec *aimv1alpha1.AIMServiceTemplateSpec
TemplateNamespace string
ImageResources *corev1.ResourceRequirements
TemplateSelectionReason string
TemplateSelectionMessage string
TemplateSelectionCount int
TemplatesExistButNotReady bool // True when templates exist but aren't Available yet
ImageReady bool
ImageReadyReason string
ImageReadyMessage string
InferenceServicePodImageError *ImagePullError // Categorized image pull error from InferenceService pods
TemplateCache *aimv1alpha1.AIMTemplateCache
ModelCaches *aimv1alpha1.AIMModelCacheList
}
ServiceObservation holds observed state for an AIMService reconciliation.
func (*ServiceObservation) RuntimeName ¶
func (o *ServiceObservation) RuntimeName() string
RuntimeName returns the effective runtime name for the service.
func (*ServiceObservation) TemplateFound ¶
func (o *ServiceObservation) TemplateFound() bool
TemplateFound returns true if a template was resolved (namespace or cluster scope).
type TemplateCandidate ¶
type TemplateCandidate struct {
Name string
Namespace string
Scope TemplateScope
Spec aimv1alpha1.AIMServiceTemplateSpecCommon
Status aimv1alpha1.AIMServiceTemplateStatus
}
TemplateCandidate captures the information needed to evaluate a template during selection.
func SelectBestTemplate ¶
func SelectBestTemplate( candidates []TemplateCandidate, overrides *aimv1alpha1.AIMServiceOverrides, availableGPUs []string, ) (*TemplateCandidate, int)
SelectBestTemplate selects the best template candidate from the provided list. The heuristic is: 1. Consider only templates that are Available. 2. Filter by service overrides when provided. 3. Filter by GPUs that exist in the cluster. 4. Prefer namespace-scoped templates over cluster-scoped templates. 5. Prefer higher-tier GPUs, then latency over throughput, then lower precision. Returns (selected template, count of templates with identical preference scores). If count > 1, the templates are ambiguous (identical in all preference dimensions).
func (TemplateCandidate) QualifiedName ¶
func (c TemplateCandidate) QualifiedName() string
QualifiedName returns a human-readable identifier for logging/debugging.
type TemplateObservation ¶
type TemplateObservation struct {
Job *batchv1.Job
Image string
ImageResources *corev1.ResourceRequirements
ImagePullSecrets []corev1.LocalObjectReference
ServiceAccountName string
RuntimeConfig *RuntimeConfigResolution
TemplateCaches *aimv1alpha1.AIMTemplateCacheList
GPUModel string
GPUAvailable bool
GPUChecked bool
JobPodImageError *ImagePullError // Categorized image pull error if job pod is stuck
}
TemplateObservation holds the common observed state for both template types
type TemplateObservationOptions ¶
type TemplateObservationOptions[R client.Object] struct { K8sClient client.Client // Required for pod status checking GetRuntime func(ctx context.Context) (R, error) ShouldCheckDiscoveryJob bool GetDiscoveryJob func(ctx context.Context) (*batchv1.Job, error) GetJobNamespace func() string // Namespace where the job runs (for pod lookup) LookupImage func(ctx context.Context) (*ImageLookupResult, error) ResolveRuntimeConfig func(ctx context.Context) (*RuntimeConfigResolution, error) OnRuntimeConfigResolved func(resolution *RuntimeConfigResolution) GetImagePullSecrets func() []corev1.LocalObjectReference // Template's imagePullSecrets GetServiceAccountName func() string // Template's serviceAccountName GetTemplateCaches func(ctx context.Context) (*aimv1alpha1.AIMTemplateCacheList, error) }
TemplateObservationOptions configures ObserveTemplate behaviour.
type TemplatePlanBuilders ¶
type TemplatePlanBuilders struct {
BuildRuntime func(input TemplatePlanInput) client.Object
BuildDiscoveryJob func(input TemplatePlanInput) client.Object
}
TemplatePlanBuilders specifies how to render runtime and discovery job objects.
type TemplatePlanContext ¶
type TemplatePlanContext struct {
Ctx context.Context
Client client.Client
Template metav1.Object
APIVersion string
Kind string
Status aimv1alpha1.AIMTemplateStatusEnum
Observation *TemplateObservation
}
TemplatePlanContext provides metadata needed during plan generation.
type TemplatePlanInput ¶
type TemplatePlanInput struct {
Observation *TemplateObservation
RuntimeConfigSpec aimv1alpha1.AIMRuntimeConfigSpec
OwnerReference metav1.OwnerReference
}
TemplatePlanInput supplies builders with convenient access to observation data.
type TemplateResolution ¶
type TemplateResolution struct {
BaseName string
FinalName string
Derived bool
Scope TemplateScope
}
TemplateResolution captures the result of resolving a template name for a service.
type TemplateScope ¶
type TemplateScope string
TemplateScope indicates whether a template is namespace-scoped, cluster-scoped, or unresolved.
const ( TemplateScopeNone TemplateScope = "" TemplateScopeNamespace TemplateScope = "namespace" TemplateScopeCluster TemplateScope = "cluster" )
func LoadBaseTemplateSpec ¶
func LoadBaseTemplateSpec(ctx context.Context, k8sClient client.Client, service *aimv1alpha1.AIMService, baseName string) (*aimv1alpha1.AIMServiceTemplateSpec, TemplateScope, error)
LoadBaseTemplateSpec fetches the base template spec for a derived template. Searches namespace-scoped templates first, then falls back to cluster-scoped templates.
func ResolveOrCreateModelFromImage ¶
func ResolveOrCreateModelFromImage( ctx context.Context, k8sClient client.Client, serviceNamespace string, imageURI string, runtimeConfig *aimv1alpha1.AIMRuntimeConfigSpec, imagePullSecrets []corev1.LocalObjectReference, serviceAccountName string, ) (modelName string, scope TemplateScope, err error)
ResolveOrCreateModelFromImage searches for existing models matching the image URI, or creates a new one if none exists. Returns the model name and scope.
type TemplateSelectionStatus ¶
type TemplateSelectionStatus struct {
AutoSelected bool
CandidateCount int
SelectionReason string
SelectionMessage string
TemplatesExistButNotReady bool
ImageReady bool
ImageReadyReason string
ImageReadyMessage string
ModelResolutionErr error
}
TemplateSelectionStatus captures metadata about automatic template selection.
type TemplateSpec ¶
type TemplateSpec interface {
GetModelName() string
GetSpecModelSources() []aimv1alpha1.AIMModelSource
}
TemplateSpec provides the common template specification
type TemplateWithStatus ¶
type TemplateWithStatus interface {
TemplateSpec
client.Object
GetStatus() *aimv1alpha1.AIMServiceTemplateStatus
}
TemplateWithStatus extends TemplateSpec with status access