Documentation
¶
Index ¶
- Constants
- Variables
- func ApplyHeadroomAndRound(baseSizeBytes int64, headroomPercent int32) int64
- func BuildClusterServingRuntime(template aimstate.TemplateState, ownerRef metav1.OwnerReference) *servingv1alpha1.ClusterServingRuntime
- func BuildDerivedTemplate(service *aimv1alpha1.AIMService, templateName string, resolvedModelName string, ...) *aimv1alpha1.AIMServiceTemplate
- func BuildDiscoveryJob(spec DiscoveryJobSpec) *batchv1.Job
- func BuildInferenceService(serviceState aimstate.ServiceState, ownerRef metav1.OwnerReference) *servingv1beta1.InferenceService
- func BuildInferenceServiceHTTPRoute(serviceState aimstate.ServiceState, ownerRef metav1.OwnerReference) *gatewayapiv1.HTTPRoute
- func BuildServingRuntime(template aimstate.TemplateState, ownerRef metav1.OwnerReference) *servingv1alpha1.ServingRuntime
- func BuildServingRuntimeFromState(state aimstate.TemplateState, ownerRef metav1.OwnerReference) *servingv1alpha1.ServingRuntime
- func BuildTemplateStateFromObservation(name, namespace string, specCommon aimv1alpha1.AIMServiceTemplateSpecCommon, ...) aimstate.TemplateState
- func CountActiveDiscoveryJobs(ctx context.Context, k8sClient client.Client) (int, error)
- func DefaultRoutePath(service *aimv1alpha1.AIMService) string
- func DerivedTemplateName(baseName, suffix string) string
- func EvaluateHTTPRouteStatus(route *gatewayapiv1.HTTPRoute) (bool, string, string)
- func EvaluateInferenceServiceStatus(status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, ...)
- func EvaluateRoutingStatus(service *aimv1alpha1.AIMService, obs *ServiceObservation, ...) (enabled bool, ready bool, hasFatalError bool)
- func FormatRuntimeConfigSources(resolution *RuntimeConfigResolution, namespaceLabel string) []string
- func GenerateInferenceServiceName(serviceName, namespace string) string
- func GetAMDDeviceIDsForModel(modelName string) []string
- func GetClusterGPUResources(ctx context.Context, k8sClient client.Client) (map[string]GPUResourceInfo, error)
- func GetClusterServingRuntime(ctx context.Context, k8sClient client.Client, name string) (*servingv1alpha1.ClusterServingRuntime, error)
- func GetDiscoveryJob(ctx context.Context, k8sClient client.Client, namespace, templateName string) (*batchv1.Job, error)
- func GetImageConfigLabels(ctx context.Context, imageURI string, keychain authn.Keychain) (map[string]string, error)
- func GetOperatorNamespace() string
- func GetPVCHeadroomPercent(spec aimv1alpha1.AIMRuntimeConfigSpec) int32
- func GetServingRuntime(ctx context.Context, k8sClient client.Client, namespace, name string) (*servingv1alpha1.ServingRuntime, error)
- func HandleImageMissing(status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, ...) bool
- func HandleImageNotReady(status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, ...) bool
- func HandleInferenceServicePodImageError(status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, ...) bool
- func HandleMissingModelSource(status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, ...) bool
- func HandleModelCacheReadiness(service *aimv1alpha1.AIMService, status *aimv1alpha1.AIMServiceStatus, ...) bool
- func HandleModelResolutionFailure(status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, ...) bool
- func HandlePathTemplateError(status *aimv1alpha1.AIMServiceStatus, service *aimv1alpha1.AIMService, ...) bool
- func HandleReconcileErrors(status *aimv1alpha1.AIMServiceStatus, ...) bool
- func HandleRuntimeConfigMissing(status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, ...) bool
- func HandleTemplateDegraded(status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, ...) bool
- func HandleTemplateNotAvailable(status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, ...) bool
- func HandleTemplateSelectionFailure(status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, ...) bool
- func HasOwnerReference(refs []metav1.OwnerReference, uid types.UID) bool
- func InferenceServiceRouteName(serviceName string) string
- func InspectImage(ctx context.Context, imageURI string, ...) (*aimv1alpha1.ImageMetadata, error)
- func IsDerivedTemplate(labels map[string]string) bool
- func IsGPUAvailable(ctx context.Context, k8sClient client.Client, gpuModel string) (bool, error)
- func IsJobComplete(job *batchv1.Job) bool
- func IsJobFailed(job *batchv1.Job) bool
- func IsJobSucceeded(job *batchv1.Job) bool
- func JoinRuntimeConfigSources(resolution *RuntimeConfigResolution, namespaceLabel string) string
- func ListAvailableGPUs(ctx context.Context, k8sClient client.Client) ([]string, error)
- func NodeGPUChangePredicate() predicate.Predicate
- func NormalizeRuntimeConfigName(name string) string
- func ObserveDerivedTemplate(ctx context.Context, k8sClient client.Client, service *aimv1alpha1.AIMService, ...) error
- func ObserveNonDerivedTemplate(ctx context.Context, k8sClient client.Client, service *aimv1alpha1.AIMService, ...) error
- func OverridesSuffix(overrides *aimv1alpha1.AIMServiceOverrides) string
- func PlanImageResources(ctx context.Context, input ImagePlanInput) ([]client.Object, *aimv1alpha1.ImageMetadata, error)
- func PlanTemplateResources(ctx TemplatePlanContext, builders TemplatePlanBuilders) ([]client.Object, bool)
- func PopulateObservationFromClusterTemplate(ctx context.Context, k8sClient client.Client, service *aimv1alpha1.AIMService, ...) error
- func PopulateObservationFromNamespaceTemplate(ctx context.Context, k8sClient client.Client, service *aimv1alpha1.AIMService, ...) error
- func ProjectImageStatus(status *aimv1alpha1.AIMModelStatus, spec aimv1alpha1.AIMModelSpec, ...)
- func ProjectServiceStatus(service *aimv1alpha1.AIMService, obs *ServiceObservation, ...)
- func ProjectTemplateStatus(ctx context.Context, k8sClient client.Client, clientset kubernetes.Interface, ...) error
- func PropagateLabels(parent, child client.Object, config *aimv1alpha1.AIMRuntimeConfigCommon)
- func QuantityWithHeadroom(baseSizeBytes int64, headroomPercent int32) resource.Quantity
- func RequestsForServices(services []aimv1alpha1.AIMService) []reconcile.Request
- func ResolveServiceRoutePath(service *aimv1alpha1.AIMService, ...) (string, error)
- func ResolveServiceRouteTimeout(service *aimv1alpha1.AIMService, ...) *string
- func ResolveStorageClass(explicitStorageClass string, ...) string
- func ResolveTemplateNameForService(ctx context.Context, k8sClient client.Client, service *aimv1alpha1.AIMService) (TemplateResolution, TemplateSelectionStatus, error)
- func RuntimeConfigNameForService(service *aimv1alpha1.AIMService, ...) string
- func SanitizeLabelValue(s string) string
- func SelectBestTemplate(candidates []TemplateCandidate, overrides *aimv1alpha1.AIMServiceOverrides, ...) (*TemplateCandidate, int, SelectionDiagnostics, []CandidateEvaluation)
- func SetCondition(conditions *[]metav1.Condition, newCondition metav1.Condition)
- func TemplateNameFromSpec(service *aimv1alpha1.AIMService) string
- func TemplateRequiresGPU(spec aimv1alpha1.AIMServiceTemplateSpecCommon) bool
- func UpdateTemplateGPUAvailability(ctx context.Context, k8sClient client.Client, ...) error
- type CandidateEvaluation
- type DiscoveryJobSpec
- type GPUResourceInfo
- type ImageLookupResult
- type ImageObservation
- type ImageObservationOptions
- type ImagePlanInput
- type ImagePullError
- type ImagePullErrorType
- type ImageRegistryError
- type MetadataFormatError
- type ModelReference
- type ParsedDiscovery
- type RuntimeConfigResolution
- type RuntimeObservation
- type SelectionDiagnostics
- type ServiceObservation
- type TemplateCandidate
- type TemplateObservation
- type TemplateObservationOptions
- type TemplatePlanBuilders
- type TemplatePlanContext
- type TemplatePlanInput
- type TemplateResolution
- type TemplateScope
- func LoadBaseTemplateSpec(ctx context.Context, k8sClient client.Client, service *aimv1alpha1.AIMService, ...) (*aimv1alpha1.AIMServiceTemplateSpec, TemplateScope, error)
- func ResolveOrCreateModelFromImage(ctx context.Context, k8sClient client.Client, serviceNamespace string, ...) (modelName string, scope TemplateScope, err error)
- type TemplateSelectionStatus
- type TemplateSpec
- type TemplateWithStatus
Constants ¶
const ( // DefaultRuntimeConfigName is the name of the default AIM runtime config DefaultRuntimeConfigName = "default" // MaxConcurrentDiscoveryJobs is the global limit for concurrent discovery jobs across all namespaces MaxConcurrentDiscoveryJobs = 10 // DefaultPVCHeadroomPercent is the default percentage of extra space to add to PVCs // for model storage. This accounts for filesystem overhead and temporary files during // model loading. The value represents a percentage (e.g., 10 means 10% extra space). DefaultPVCHeadroomPercent = 10 // AimLabelDomain is the base domain used for AIM-specific labels. AimLabelDomain = "aim.silogen.ai" // AIM label keys. LabelKeyTemplate = AimLabelDomain + "/template" LabelKeyModelID = AimLabelDomain + "/model-id" LabelKeyDerivedTemplate = AimLabelDomain + "/derived-template" LabelKeyAutoGenerated = AimLabelDomain + "/auto-generated" LabelKeyImageName = AimLabelDomain + "/aim-image" LabelKeyMetric = AimLabelDomain + "/template.metric" LabelKeyPrecision = AimLabelDomain + "/template.precision" LabelKeyServiceName = AimLabelDomain + "/service-name" LabelKeyModelCache = AimLabelDomain + "/modelcache" LabelKeyTemplateCache = AimLabelDomain + "/template-cache" LabelKeyCacheType = AimLabelDomain + "/cache-type" LabelKeySourceModel = AimLabelDomain + "/source-model" // AIM label values. LabelValueRuntimeName = "aim-runtime" LabelValueRuntimeComponent = "serving-runtime" LabelValueManagedBy = "aim-controller" LabelValueDiscoveryName = "aim-discovery" LabelValueDiscoveryComponent = "discovery-job" LabelValueServiceName = "aim-service" LabelValueServiceComponent = "inference-service" LabelValueDerivedTemplate = "true" LabelValueAutoGenerated = "true" // Cache type label values. LabelValueCacheTypeTemplateCache = "template-cache" LabelValueCacheTypeTempService = "temporary-service-cache" // NodeLabelAMDGPUDeviceID is the primary node label for AMD GPU device IDs (e.g., "74a1" for MI300X) NodeLabelAMDGPUDeviceID = "amd.com/gpu.device-id" )
const ( // DiscoveryJobBackoffLimit is the number of retries before marking the discovery job as failed DiscoveryJobBackoffLimit = 3 // DiscoveryJobTTLSeconds defines how long completed discovery jobs persist // before automatic cleanup. This allows time for status inspection and log retrieval. DiscoveryJobTTLSeconds = 60 )
const ( // DefaultGPUResourceName is the default resource name for AMD GPUs in Kubernetes DefaultGPUResourceName = "amd.com/gpu" // This is required for efficient inter-process communication in model serving workloads. DefaultSharedMemorySize = "8Gi" // KubernetesLabelValueMaxLength is the maximum length for a Kubernetes label value KubernetesLabelValueMaxLength = 63 )
const ( // LabelAutoCreated marks models that were automatically created from service image references LabelAutoCreated = "aim.silogen.ai/auto-created" LabelKeyModelSource = "aim.silogen.ai/model-source" )
const ( // MaxRoutePathLength is the maximum allowed length for a route path. // This prevents excessively long paths that could cause issues with gateways or proxies. MaxRoutePathLength = 200 )
Variables ¶
var ErrImageNotFound = errors.New("image not found in catalog")
ErrImageNotFound is returned when an image is not found in the catalog
var ( // ErrMultipleModelsFound is returned when multiple models exist with the same image URI ErrMultipleModelsFound = errors.New("multiple models found with the same image") )
var ErrRuntimeConfigNotFound = errors.New("runtime config not found")
ErrRuntimeConfigNotFound indicates that neither namespace nor cluster runtime config could be located.
var GPUPreferenceOrder = []string{
"MI325X",
"MI300X",
"MI250X",
"MI210",
"A100",
"H100",
}
GPUPreferenceOrder defines the preference order for GPU models when selecting templates. GPUs earlier in the list are preferred over later ones. TODO: Fill in the complete preference order based on performance characteristics.
var KnownAmdGpuDevices = map[string]string{
"738c": "MI100",
"738e": "MI100",
"7408": "MI250X",
"740c": "MI250X",
"740f": "MI210",
"7410": "MI210",
"74a0": "MI300A",
"74a1": "MI300X",
"74a2": "MI308X",
"74a5": "MI325X",
"74a8": "MI308X",
"74a9": "MI300X",
"74b5": "MI300X",
"74b6": "MI308X",
"74b9": "MI325X",
"74bd": "MI300X",
"75a0": "MI350X",
"75a3": "MI355X",
"75b0": "MI350X",
"75b3": "MI355X",
"7460": "V710",
"7461": "V710",
"7448": "W7900",
"744a": "W7900",
"7449": "W7800",
"745e": "W7800",
"73a2": "W6900X",
"73a3": "W6800",
"73ab": "W6800X",
"73a1": "V620",
"73ae": "V620",
"7550": "RX9070",
"744c": "RX7900",
"73af": "RX6900",
"73bf": "RX6800",
}
var MetricPreferenceOrder = []string{
"latency",
"throughput",
}
MetricPreferenceOrder defines preference for optimization metrics. "latency" is preferred over "throughput" by default.
var PrecisionPreferenceOrder = []string{
"fp8",
"fp16",
"bf16",
"fp32",
}
PrecisionPreferenceOrder defines preference for precision levels. Lower precision (more optimized) is preferred.
var ProfileTypePreferenceOrder = []string{ string(aimv1alpha1.AIMProfileTypeOptimized), string(aimv1alpha1.AIMProfileTypePreview), string(aimv1alpha1.AIMProfileTypeUnoptimized), }
Functions ¶
func ApplyHeadroomAndRound ¶
ApplyHeadroomAndRound applies headroom percentage to a base size and rounds up to the nearest Gi. This ensures PVC sizes are clean, human-readable values (e.g., "421Gi" instead of "451936812032").
Parameters:
- baseSizeBytes: The original size in bytes
- headroomPercent: Percentage of extra space to add (0-100, e.g., 10 means 10% extra)
Returns:
- The final size in bytes, rounded up to the nearest Gi boundary
Example:
- Input: 9,094,593,249 bytes with 10% headroom
- With headroom: 10,004,052,573 bytes (9.31 Gi)
- Rounded: 10,737,418,240 bytes (10 Gi)
func BuildClusterServingRuntime ¶
func BuildClusterServingRuntime(template aimstate.TemplateState, ownerRef metav1.OwnerReference) *servingv1alpha1.ClusterServingRuntime
BuildClusterServingRuntime creates a KServe ClusterServingRuntime for a cluster-scoped template.
func BuildDerivedTemplate ¶
func BuildDerivedTemplate( service *aimv1alpha1.AIMService, templateName string, resolvedModelName string, baseSpec *aimv1alpha1.AIMServiceTemplateSpec, ) *aimv1alpha1.AIMServiceTemplate
BuildDerivedTemplate constructs an AIMServiceTemplate for a service with overrides. The template inherits from the base spec and applies service-specific customizations.
func BuildDiscoveryJob ¶
func BuildDiscoveryJob(spec DiscoveryJobSpec) *batchv1.Job
BuildDiscoveryJob creates a Job that runs model discovery dry-run
func BuildInferenceService ¶
func BuildInferenceService(serviceState aimstate.ServiceState, ownerRef metav1.OwnerReference) *servingv1beta1.InferenceService
BuildInferenceService constructs a KServe InferenceService referencing a ServingRuntime or ClusterServingRuntime.
func BuildInferenceServiceHTTPRoute ¶
func BuildInferenceServiceHTTPRoute(serviceState aimstate.ServiceState, ownerRef metav1.OwnerReference) *gatewayapiv1.HTTPRoute
BuildInferenceServiceHTTPRoute creates an HTTPRoute that exposes the predictor service via the provided gateway parent.
func BuildServingRuntime ¶
func BuildServingRuntime(template aimstate.TemplateState, ownerRef metav1.OwnerReference) *servingv1alpha1.ServingRuntime
BuildServingRuntime creates a KServe ServingRuntime for a namespace-scoped template.
func BuildServingRuntimeFromState ¶
func BuildServingRuntimeFromState(state aimstate.TemplateState, ownerRef metav1.OwnerReference) *servingv1alpha1.ServingRuntime
BuildServingRuntimeFromState constructs a namespaced ServingRuntime from a TemplateState snapshot. This is an adapter function that maintains compatibility with the original signature.
func BuildTemplateStateFromObservation ¶
func BuildTemplateStateFromObservation( name, namespace string, specCommon aimv1alpha1.AIMServiceTemplateSpecCommon, observation *TemplateObservation, runtimeConfigSpec aimv1alpha1.AIMRuntimeConfigSpec, status *aimv1alpha1.AIMServiceTemplateStatus, ) aimstate.TemplateState
BuildTemplateStateFromObservation constructs a TemplateState from the template specification, observation, and status. This is an adapter function that combines template metadata with observed resources.
func CountActiveDiscoveryJobs ¶
CountActiveDiscoveryJobs counts the number of active (non-complete) discovery jobs across all namespaces. A job is considered active if it exists and is not in a complete state (succeeded or failed).
func DefaultRoutePath ¶
func DefaultRoutePath(service *aimv1alpha1.AIMService) string
DefaultRoutePath returns the default HTTP route prefix.
func DerivedTemplateName ¶
DerivedTemplateName constructs a template name from a base name and suffix. Ensures the final name does not exceed Kubernetes name length limits.
func EvaluateHTTPRouteStatus ¶
func EvaluateHTTPRouteStatus(route *gatewayapiv1.HTTPRoute) (bool, string, string)
EvaluateHTTPRouteStatus checks the HTTPRoute status and returns readiness state.
func EvaluateInferenceServiceStatus ¶
func EvaluateInferenceServiceStatus( status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, inferenceService *servingv1beta1.InferenceService, httpRoute *gatewayapiv1.HTTPRoute, routingEnabled bool, routingReady bool, setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string), )
EvaluateInferenceServiceStatus checks InferenceService and routing readiness. Updates status conditions based on the InferenceService and routing state.
func EvaluateRoutingStatus ¶
func EvaluateRoutingStatus( service *aimv1alpha1.AIMService, obs *ServiceObservation, status *aimv1alpha1.AIMServiceStatus, setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string), ) (enabled bool, ready bool, hasFatalError bool)
EvaluateRoutingStatus checks routing configuration and updates status accordingly. Returns (enabled, ready, hasFatalError) to indicate if routing is enabled, if it's ready, and if there's a terminal error.
func FormatRuntimeConfigSources ¶
func FormatRuntimeConfigSources(resolution *RuntimeConfigResolution, namespaceLabel string) []string
FormatRuntimeConfigSources renders a human-readable list of runtime config sources for logging/events.
func GenerateInferenceServiceName ¶
GenerateInferenceServiceName creates a KServe InferenceService name that fits DNS label constraints. KServe creates hostnames in the format: {isvc-name}-predictor-{namespace} These hostnames must be ≤ 63 characters to comply with DNS label limits.
If the original name would exceed the limit, this function: 1. Truncates the base name 2. Appends an 8-character hash of the full original name 3. Ensures the result is RFC1123 compliant
The hash ensures uniqueness while keeping names deterministic and short.
func GetAMDDeviceIDsForModel ¶
GetAMDDeviceIDsForModel returns all AMD device IDs that map to a given GPU model name. This is the inverse of mapAMDDeviceIDToModel, allowing lookup of all device IDs for a model. Example: GetAMDDeviceIDsForModel("MI300X") returns ["74a1", "74a9", "74b5", "74bd"] Returns empty slice if the model is not found or is not an AMD GPU.
func GetClusterGPUResources ¶
func GetClusterGPUResources(ctx context.Context, k8sClient client.Client) (map[string]GPUResourceInfo, error)
GetClusterGPUResources returns an aggregated view of all GPU resources in the cluster. It scans all nodes and aggregates resources that start with "amd.com/" or "nvidia.com/". Returns a map where keys are GPU models (e.g., "MI300X", "A100") extracted from node labels, and values contain the resource name.
func GetClusterServingRuntime ¶
func GetClusterServingRuntime(ctx context.Context, k8sClient client.Client, name string) (*servingv1alpha1.ClusterServingRuntime, error)
GetClusterServingRuntime fetches a ClusterServingRuntime by name
func GetDiscoveryJob ¶
func GetDiscoveryJob(ctx context.Context, k8sClient client.Client, namespace, templateName string) (*batchv1.Job, error)
GetDiscoveryJob fetches the discovery job for a template. Returns the newest job (by CreationTimestamp) if multiple exist.
func GetImageConfigLabels ¶
func GetImageConfigLabels(ctx context.Context, imageURI string, keychain authn.Keychain) (map[string]string, error)
GetImageConfigLabels is a helper function that retrieves just the labels from an image without parsing them into structured metadata. Useful for debugging.
func GetOperatorNamespace ¶
func GetOperatorNamespace() string
GetOperatorNamespace returns the namespace where the AIM operator runs. It reads the AIM_OPERATOR_NAMESPACE environment variable; if unset, it defaults to "kaiwo-system".
func GetPVCHeadroomPercent ¶
func GetPVCHeadroomPercent(spec aimv1alpha1.AIMRuntimeConfigSpec) int32
GetPVCHeadroomPercent returns the PVC headroom percentage from the runtime config spec. If not set, returns the default value defined in DefaultPVCHeadroomPercent.
func GetServingRuntime ¶
func GetServingRuntime(ctx context.Context, k8sClient client.Client, namespace, name string) (*servingv1alpha1.ServingRuntime, error)
GetServingRuntime fetches a ServingRuntime by namespace and name
func HandleImageMissing ¶
func HandleImageMissing( status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string), ) bool
HandleImageMissing checks for missing image and updates status. Returns true if the image is missing.
func HandleImageNotReady ¶
func HandleImageNotReady( status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string), ) bool
HandleImageNotReady checks if the resolved image is not yet ready and updates status. Returns true if the service should wait for the image to become ready.
func HandleInferenceServicePodImageError ¶
func HandleInferenceServicePodImageError( status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string), ) bool
HandleInferenceServicePodImageError checks for image pull errors in InferenceService pods. Returns true if an image pull error was detected.
func HandleMissingModelSource ¶
func HandleMissingModelSource( status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string), ) bool
HandleMissingModelSource checks if the template is available but has no model sources. Returns true if model sources are missing (discovery succeeded but produced no usable sources).
func HandleModelCacheReadiness ¶
func HandleModelCacheReadiness(service *aimv1alpha1.AIMService, status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string)) bool
func HandleModelResolutionFailure ¶
func HandleModelResolutionFailure( status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string), ) bool
HandleModelResolutionFailure checks for model resolution failures and updates status. Returns true if model resolution failed.
func HandlePathTemplateError ¶
func HandlePathTemplateError( status *aimv1alpha1.AIMServiceStatus, service *aimv1alpha1.AIMService, obs *ServiceObservation, setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string), ) bool
HandlePathTemplateError checks for path template errors and updates status. Returns true if there is a path template error. This can occur when routing is enabled (via service spec or runtime config) but the path template is invalid.
func HandleReconcileErrors ¶
func HandleReconcileErrors( status *aimv1alpha1.AIMServiceStatus, setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string), errs controllerutils.ReconcileErrors, ) bool
HandleReconcileErrors processes reconciliation errors and updates service status. Returns true if errors were found and handled.
func HandleRuntimeConfigMissing ¶
func HandleRuntimeConfigMissing( status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string), ) bool
HandleRuntimeConfigMissing checks for missing runtime config and updates status. Returns true if the runtime config is missing.
func HandleTemplateDegraded ¶
func HandleTemplateDegraded( status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string), ) bool
HandleTemplateDegraded checks if the template is degraded, not available, or failed and updates status. Returns true if the template is degraded, not available, or failed.
func HandleTemplateNotAvailable ¶
func HandleTemplateNotAvailable( status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string), ) bool
HandleTemplateNotAvailable checks if the template is not available and updates status. Returns true if the template is not yet available (Pending or Progressing). Sets the service to Pending state because it's waiting for a dependency (the template).
func HandleTemplateSelectionFailure ¶
func HandleTemplateSelectionFailure( status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string), ) bool
HandleTemplateSelectionFailure reports failures during automatic template selection.
func HasOwnerReference ¶
func HasOwnerReference(refs []metav1.OwnerReference, uid types.UID) bool
HasOwnerReference checks if the given UID exists in the owner references list.
func InferenceServiceRouteName ¶
InferenceServiceRouteName returns the canonical HTTPRoute name for an InferenceService.
func InspectImage ¶
func InspectImage( ctx context.Context, imageURI string, imagePullSecrets []corev1.LocalObjectReference, clientset kubernetes.Interface, namespace string, ) (*aimv1alpha1.ImageMetadata, error)
InspectImage extracts metadata from a container image using the provided image pull secrets. It uses go-containerregistry to authenticate and fetch image labels, then parses them into the ImageMetadata structure.
Parameters:
- ctx: Context for the operation
- imageURI: Full container image reference (e.g., "registry.example.com/repo/image:tag")
- imagePullSecrets: Kubernetes image pull secrets for authentication
- clientset: Kubernetes clientset for accessing secrets
- namespace: Namespace where the secrets are located
Returns:
- *ImageMetadata: Extracted metadata if successful
- error: Any error encountered during inspection (authentication, network, parsing, etc.) Registry access errors are wrapped in ImageRegistryError for categorization.
func IsDerivedTemplate ¶
IsDerivedTemplate returns true when the provided labels indicate a controller-managed derived template.
func IsGPUAvailable ¶
IsGPUAvailable checks if a specific GPU model is available in the cluster. The gpuModel parameter should be the GPU model name (e.g., "MI300X", "A100"), not the resource name. The input is normalized to handle variants like "MI300X (rev 2)" or "Instinct MI300X".
func IsJobComplete ¶
IsJobComplete returns true if the job has completed (successfully or failed)
func IsJobFailed ¶
IsJobFailed returns true if the job failed
func IsJobSucceeded ¶
IsJobSucceeded returns true if the job completed successfully
func JoinRuntimeConfigSources ¶
func JoinRuntimeConfigSources(resolution *RuntimeConfigResolution, namespaceLabel string) string
JoinRuntimeConfigSources joins runtime config sources for concise logging.
func ListAvailableGPUs ¶
ListAvailableGPUs returns a list of all GPU resource types available in the cluster.
func NodeGPUChangePredicate ¶
NodeGPUChangePredicate returns a predicate that triggers reconciles when GPU-related node attributes change.
func NormalizeRuntimeConfigName ¶
NormalizeRuntimeConfigName returns the effective name to use for lookups when the user omits the field.
func ObserveDerivedTemplate ¶
func ObserveDerivedTemplate( ctx context.Context, k8sClient client.Client, service *aimv1alpha1.AIMService, resolution TemplateResolution, obs *ServiceObservation, ) error
ObserveDerivedTemplate handles observation for services with derived templates. It fetches the derived template if it exists, or loads the base template spec for creation.
func ObserveNonDerivedTemplate ¶
func ObserveNonDerivedTemplate( ctx context.Context, k8sClient client.Client, service *aimv1alpha1.AIMService, templateName string, preferredScope TemplateScope, obs *ServiceObservation, ) error
ObserveNonDerivedTemplate handles observation for services with non-derived templates. It searches for namespace-scoped templates first, then falls back to cluster-scoped templates. Does not set ShouldCreateTemplate - that decision is made in the controller based on whether an explicit templateRef was provided.
func OverridesSuffix ¶
func OverridesSuffix(overrides *aimv1alpha1.AIMServiceOverrides) string
OverridesSuffix computes a hash suffix for service overrides.
func PlanImageResources ¶
func PlanImageResources(ctx context.Context, input ImagePlanInput) ([]client.Object, *aimv1alpha1.ImageMetadata, error)
PlanImageResources plans the desired state for an image resource. It performs metadata extraction if needed and creates ServiceTemplates based on recommendedDeployments.
func PlanTemplateResources ¶
func PlanTemplateResources(ctx TemplatePlanContext, builders TemplatePlanBuilders) ([]client.Object, bool)
PlanTemplateResources produces desired objects based on the observation and controller-provided builders. It respects the global limit on concurrent discovery jobs (MaxConcurrentDiscoveryJobs). Returns the desired objects and a boolean indicating if a requeue is needed (when job limit is reached).
func PopulateObservationFromClusterTemplate ¶
func PopulateObservationFromClusterTemplate( ctx context.Context, k8sClient client.Client, service *aimv1alpha1.AIMService, template *aimv1alpha1.AIMClusterServiceTemplate, obs *ServiceObservation, ) error
PopulateObservationFromClusterTemplate extracts data from a cluster-scoped template into the observation.
func PopulateObservationFromNamespaceTemplate ¶
func PopulateObservationFromNamespaceTemplate( ctx context.Context, k8sClient client.Client, service *aimv1alpha1.AIMService, template *aimv1alpha1.AIMServiceTemplate, obs *ServiceObservation, ) error
PopulateObservationFromNamespaceTemplate extracts data from a namespace-scoped template into the observation.
func ProjectImageStatus ¶
func ProjectImageStatus( status *aimv1alpha1.AIMModelStatus, spec aimv1alpha1.AIMModelSpec, observation *ImageObservation, extractedMetadata *aimv1alpha1.ImageMetadata, extractionErr error, observedGeneration int64, )
ProjectImageStatus updates the status of an image resource based on observation and errors.
func ProjectServiceStatus ¶
func ProjectServiceStatus( service *aimv1alpha1.AIMService, obs *ServiceObservation, inferenceService *servingv1beta1.InferenceService, httpRoute *gatewayapiv1.HTTPRoute, errs controllerutils.ReconcileErrors, )
ProjectServiceStatus computes and updates the service status based on observations and errors. This is a high-level orchestrator that calls the individual status handler functions.
func ProjectTemplateStatus ¶
func ProjectTemplateStatus( ctx context.Context, k8sClient client.Client, clientset kubernetes.Interface, recorder record.EventRecorder, template TemplateWithStatus, obs *TemplateObservation, errs controllerutils.ReconcileErrors, imageNotFoundMessage string, ) error
ProjectTemplateStatus computes status from observation and errors. This is shared between cluster and namespace-scoped template controllers. Modifies templateStatus directly and emits events for discovery phase changes.
func PropagateLabels ¶
func PropagateLabels(parent, child client.Object, config *aimv1alpha1.AIMRuntimeConfigCommon)
PropagateLabels propagates labels from a parent resource to a child resource based on the runtime config's label propagation settings. Only labels whose keys match the patterns defined in the config are copied. The child's existing labels are preserved and only new labels are added.
Parameters:
- parent: The source resource whose labels should be propagated
- child: The target resource that will receive the propagated labels
- config: The runtime config common spec containing label propagation settings
The function does nothing if:
- Label propagation is not enabled in the config
- The config is nil or has no label propagation settings
- The parent has no labels
Special handling for Jobs: Labels are also propagated to the PodTemplateSpec.
func QuantityWithHeadroom ¶
QuantityWithHeadroom creates a resource.Quantity with headroom applied and rounded to the nearest Gi. This is a convenience wrapper around ApplyHeadroomAndRound that returns a Kubernetes Quantity.
The returned Quantity uses BinarySI format (Ki, Mi, Gi, Ti suffixes) for compatibility with Kubernetes storage resources.
Parameters:
- baseSizeBytes: The original size in bytes
- headroomPercent: Percentage of extra space to add (0-100)
Returns:
- A resource.Quantity representing the size with headroom, formatted cleanly
func RequestsForServices ¶
func RequestsForServices(services []aimv1alpha1.AIMService) []reconcile.Request
RequestsForServices converts a list of AIMServices to reconcile requests.
func ResolveServiceRoutePath ¶
func ResolveServiceRoutePath(service *aimv1alpha1.AIMService, runtimeConfig aimv1alpha1.AIMRuntimeConfigSpec) (string, error)
ResolveServiceRoutePath renders the HTTP route prefix using service and runtime config context. The precedence order is: 1. Service.Spec.Routing.PathTemplate (highest priority) 2. RuntimeConfig.Routing.PathTemplate (base layer)
func ResolveServiceRouteTimeout ¶
func ResolveServiceRouteTimeout(service *aimv1alpha1.AIMService, runtimeConfig aimv1alpha1.AIMRuntimeConfigSpec) *string
ResolveServiceRouteTimeout resolves the HTTP route timeout using service and runtime config context. The precedence order is: 1. Service.Spec.Routing.RequestTimeout (highest priority) 2. RuntimeConfig.Routing.RequestTimeout (base layer) Returns nil if no timeout is configured at any level.
func ResolveStorageClass ¶
func ResolveStorageClass(explicitStorageClass string, runtimeConfigSpec aimv1alpha1.AIMRuntimeConfigSpec) string
ResolveStorageClass determines the effective storage class using fallback logic:
- Use explicit storage class if provided (non-empty)
- Fall back to runtime config's defaultStorageClassName if explicit is empty
- Empty string means use the cluster's default StorageClass
This implements consistent storage class resolution across all PVC creation paths.
Parameters:
- explicitStorageClass: Storage class explicitly specified in the resource spec
- runtimeConfigSpec: The resolved runtime configuration spec
Returns:
- The effective storage class name (may be empty to use cluster default)
func ResolveTemplateNameForService ¶
func ResolveTemplateNameForService( ctx context.Context, k8sClient client.Client, service *aimv1alpha1.AIMService, ) (TemplateResolution, TemplateSelectionStatus, error)
ResolveTemplateNameForService determines the template name to use for a service. It handles default template lookup, base template resolution, and derived template naming. Returns an empty BaseName/FinalName if no template can be resolved, which indicates the service should enter a degraded state.
func RuntimeConfigNameForService ¶
func RuntimeConfigNameForService(service *aimv1alpha1.AIMService, templateSpec aimv1alpha1.AIMServiceTemplateSpecCommon) string
RuntimeConfigNameForService determines the effective runtime config name for a service.
func SanitizeLabelValue ¶
SanitizeLabelValue converts a string to a valid Kubernetes label value. Valid label values must: - Be empty or consist of alphanumeric characters, '-', '_' or '.' - Start and end with an alphanumeric character - Be at most 63 characters Returns "unknown" if the sanitized value is empty.
func SelectBestTemplate ¶
func SelectBestTemplate( candidates []TemplateCandidate, overrides *aimv1alpha1.AIMServiceOverrides, availableGPUs []string, allowUnoptimized bool, ) (*TemplateCandidate, int, SelectionDiagnostics, []CandidateEvaluation)
SelectBestTemplate selects the best template candidate from the provided list. The heuristic is: 1. Consider only templates that are Available. 2. Filter by service overrides when provided. 3. Filter by GPUs that exist in the cluster. 4. Prefer namespace-scoped templates over cluster-scoped templates. 5. Prefer higher-tier GPUs, then latency over throughput, then lower precision. Returns (selected template, count of templates with identical preference scores, diagnostics, per-candidate evaluations). If count > 1, the templates are ambiguous (identical in all preference dimensions).
func SetCondition ¶
SetCondition adds or updates a condition in the conditions list.
func TemplateNameFromSpec ¶
func TemplateNameFromSpec(service *aimv1alpha1.AIMService) string
TemplateNameFromSpec returns the template name from the service spec or status. Falls back to service name if no template reference is found.
func TemplateRequiresGPU ¶
func TemplateRequiresGPU(spec aimv1alpha1.AIMServiceTemplateSpecCommon) bool
TemplateRequiresGPU returns true if the template spec declares a GPU selector with a model.
func UpdateTemplateGPUAvailability ¶
func UpdateTemplateGPUAvailability( ctx context.Context, k8sClient client.Client, spec aimv1alpha1.AIMServiceTemplateSpecCommon, obs *TemplateObservation, ) error
UpdateTemplateGPUAvailability checks whether the GPU model declared by the template exists in the cluster. It updates the provided TemplateObservation with the result of the check. The GPU model is normalized to ensure consistent matching across different label formats.
Types ¶
type CandidateEvaluation ¶
type CandidateEvaluation struct {
Candidate TemplateCandidate
Status string // "chosen" or "rejected"
Reason string // CamelCase reason
Rank int // For candidates that passed all filters
}
CandidateEvaluation captures why a specific candidate was chosen or rejected.
type DiscoveryJobSpec ¶
type DiscoveryJobSpec struct {
TemplateName string
TemplateSpec aimv1alpha1.AIMServiceTemplateSpecCommon
Namespace string
ModelID string
Image string
Env []corev1.EnvVar
ImagePullSecrets []corev1.LocalObjectReference
ServiceAccount string
OwnerRef metav1.OwnerReference
}
DiscoveryJobSpec defines parameters for creating a discovery job
type GPUResourceInfo ¶
type GPUResourceInfo struct {
// ResourceName is the full Kubernetes resource name (e.g., "amd.com/gpu").
ResourceName string
}
GPUResourceInfo contains GPU resource information for a specific GPU model.
type ImageLookupResult ¶
type ImageLookupResult struct {
Image string
Resources corev1.ResourceRequirements
}
ImageLookupResult captures the resolved image metadata from the catalog.
func LookupImageForClusterTemplate ¶
func LookupImageForClusterTemplate(ctx context.Context, k8sClient client.Client, modelName string) (*ImageLookupResult, error)
LookupImageForClusterTemplate looks up the container image for a cluster-scoped template. It searches only in AIMClusterModel resources. Returns ErrImageNotFound if no image is found in the catalog.
func LookupImageForNamespaceTemplate ¶
func LookupImageForNamespaceTemplate(ctx context.Context, k8sClient client.Client, namespace, modelName string) (*ImageLookupResult, error)
LookupImageForNamespaceTemplate looks up the container image for a namespace-scoped template. It searches AIMModel resources in the specified namespace first, then falls back to cluster-scoped AIMClusterModel resources. Returns ErrImageNotFound if no image is found in either location.
func (*ImageLookupResult) DeepCopy ¶
func (r *ImageLookupResult) DeepCopy() *ImageLookupResult
DeepCopy returns a deep copy of the ImageLookupResult.
type ImageObservation ¶
type ImageObservation struct {
// MetadataAlreadyAttempted is true if we've already attempted metadata extraction.
MetadataAlreadyAttempted bool
// MetadataExtracted is true if metadata was successfully extracted.
MetadataExtracted bool
// ImageMetadata contains the extracted metadata (if extraction succeeded).
ImageMetadata *aimv1alpha1.ImageMetadata
// RuntimeConfigResolution contains the resolved runtime config (for image pull secrets).
RuntimeConfigResolution *RuntimeConfigResolution
// ExistingTemplates are the ServiceTemplates currently owned by this image.
ExistingTemplates []client.Object
// DiscoveryEnabled reflects whether discovery is enabled from runtime config.
// Discovery is now always attempted unless disabled by runtime config.
DiscoveryEnabled bool
// MetadataError captures the latest metadata format issue encountered during extraction.
MetadataError *MetadataFormatError
// RegistryError captures categorized registry access errors (auth, not-found, etc.).
RegistryError *ImageRegistryError
// MetadataExtractionErr captures non-format extraction failures (e.g., registry or auth errors).
MetadataExtractionErr error
// TemplatesAutoGenerated tracks whether auto-generated templates were requested this cycle.
TemplatesAutoGenerated bool
}
ImageObservation holds the observed state for an AIMModel or AIMClusterModel.
func ObserveImage ¶
func ObserveImage(ctx context.Context, opts ImageObservationOptions) (*ImageObservation, error)
ObserveImage gathers the current state for an image resource.
type ImageObservationOptions ¶
type ImageObservationOptions struct {
// GetRuntimeConfig returns the runtime config for this scope (namespace or cluster).
GetRuntimeConfig func(ctx context.Context) (*RuntimeConfigResolution, error)
// ListOwnedTemplates returns templates owned by this image.
ListOwnedTemplates func(ctx context.Context) ([]client.Object, error)
// GetCurrentStatus returns the current status to check for existing conditions.
GetCurrentStatus func() *aimv1alpha1.AIMModelStatus
// GetImageSpec returns the image spec.
GetImageSpec func() aimv1alpha1.AIMModelSpec
}
ImageObservationOptions provides callbacks for observing image state.
type ImagePlanInput ¶
type ImagePlanInput struct {
// ImageName is the name of the image resource.
ImageName string
// Namespace is the namespace (empty for cluster-scoped).
Namespace string
// ImageSpec is the image specification.
ImageSpec aimv1alpha1.AIMModelSpec
// Observation is the observed state.
Observation *ImageObservation
// OwnerReference for created templates.
OwnerReference []metav1.OwnerReference
// Clientset for image inspection.
Clientset kubernetes.Interface
// IsClusterScoped indicates if this is a cluster-scoped image.
IsClusterScoped bool
// ParentObject is the AIMModel or AIMClusterModel for label propagation.
ParentObject client.Object
}
ImagePlanInput provides the input for planning image resources.
type ImagePullError ¶
type ImagePullError struct {
Type ImagePullErrorType
Container string
Reason string // e.g., "ImagePullBackOff", "ErrImagePull"
Message string // Full error message from Kubernetes
IsInitContainer bool
}
ImagePullError contains categorized information about an image pull failure
func CheckInferenceServicePodImagePullStatus ¶
func CheckInferenceServicePodImagePullStatus(ctx context.Context, k8sClient client.Client, inferenceServiceName, namespace string) *ImagePullError
CheckInferenceServicePodImagePullStatus checks if an InferenceService's pods are stuck in ImagePullBackOff or ErrImagePull state. It looks for pods with the isvc.serving.kserve.io/inferenceservice label matching the InferenceService name. Returns the image pull error details if found, or nil otherwise.
type ImagePullErrorType ¶
type ImagePullErrorType string
ImagePullErrorType categorizes image pull errors
const ( ImagePullErrorAuth ImagePullErrorType = "auth" ImagePullErrorNotFound ImagePullErrorType = "not-found" ImagePullErrorGeneric ImagePullErrorType = "generic" )
type ImageRegistryError ¶
type ImageRegistryError struct {
Type ImagePullErrorType // From template.go
Message string
Cause error
}
ImageRegistryError wraps registry access errors with categorization
func (*ImageRegistryError) Error ¶
func (e *ImageRegistryError) Error() string
func (*ImageRegistryError) Unwrap ¶
func (e *ImageRegistryError) Unwrap() error
type MetadataFormatError ¶
MetadataFormatError indicates the image metadata is malformed and cannot be processed.
func (*MetadataFormatError) Error ¶
func (e *MetadataFormatError) Error() string
type ModelReference ¶
type ModelReference struct {
Name string
Scope TemplateScope
}
ModelReference represents a found model
type ParsedDiscovery ¶
type ParsedDiscovery struct {
ModelSources []aimv1alpha1.AIMModelSource
Profile *aimv1alpha1.AIMProfile
}
ParsedDiscovery holds the parsed discovery result
func ParseDiscoveryLogs ¶
func ParseDiscoveryLogs(ctx context.Context, k8sClient client.Client, clientset kubernetes.Interface, job *batchv1.Job) (*ParsedDiscovery, error)
ParseDiscoveryLogs parses the discovery job output to extract model sources and profile. Reads pod logs from the completed job and parses the JSON output.
type RuntimeConfigResolution ¶
type RuntimeConfigResolution struct {
// Name is the runtime config name requested by the consumer.
Name string
// Namespace is the consumer namespace used when searching for AIMRuntimeConfig.
Namespace string
ClusterConfig *aimv1alpha1.AIMClusterRuntimeConfig
NamespaceConfig *aimv1alpha1.AIMRuntimeConfig
ClusterConfigNotFound bool
NamespaceConfigNotFound bool
EffectiveSpec aimv1alpha1.AIMRuntimeConfigSpec
ResolvedRef *aimv1alpha1.AIMResolvedRuntimeConfig
}
RuntimeConfigResolution captures the resolved runtime configuration. When both namespace and cluster configs exist, they are merged with namespace config taking precedence.
func ResolveRuntimeConfig ¶
func ResolveRuntimeConfig(ctx context.Context, k8sClient client.Client, namespace, configName string) (*RuntimeConfigResolution, error)
ResolveRuntimeConfig resolves runtime config with field-level merging. When both cluster and namespace configs exist, cluster config is used as base and namespace config fields override/merge on top. When configName is empty, the default runtime config name is used.
type RuntimeObservation ¶
type RuntimeObservation[R client.Object] struct { Runtime R TemplateObservation }
RuntimeObservation combines TemplateObservation with a controller-specific runtime object.
func ObserveTemplate ¶
func ObserveTemplate[R client.Object](ctx context.Context, opts TemplateObservationOptions[R]) (*RuntimeObservation[R], error)
ObserveTemplate gathers runtime, discovery job, image, and runtime config information with common error handling.
type SelectionDiagnostics ¶
type SelectionDiagnostics struct {
TotalCandidates int
AfterAvailabilityFilter int
AfterUnoptimizedFilter int
AfterOverridesFilter int
AfterGPUAvailabilityFilter int
UnoptimizedTemplatesWereFiltered bool
}
SelectionDiagnostics provides detailed information about why template selection failed.
type ServiceObservation ¶
type ServiceObservation struct {
InferenceService *servingv1beta1.InferenceService
TemplateName string
BaseTemplateName string
Scope TemplateScope
AutoSelectedTemplate bool
TemplateAvailable bool
TemplateOwnedByService bool
ShouldCreateTemplate bool
RuntimeConfigSpec aimv1alpha1.AIMRuntimeConfigSpec
ResolvedRuntimeConfig *aimv1alpha1.AIMResolvedRuntimeConfig
ResolvedImage *aimv1alpha1.AIMResolvedReference
RoutePath string
RouteTimeout *string
PathTemplateErr error
RuntimeConfigErr error
ImageErr error
ModelResolutionErr error
TemplateStatus *aimv1alpha1.AIMServiceTemplateStatus
TemplateSpecCommon aimv1alpha1.AIMServiceTemplateSpecCommon
TemplateSpec *aimv1alpha1.AIMServiceTemplateSpec
TemplateNamespace string
ImageResources *corev1.ResourceRequirements
TemplateSelectionReason string
TemplateSelectionMessage string
TemplateSelectionCount int
TemplatesExistButNotReady bool // True when templates exist but aren't Available yet
ImageReady bool
ImageReadyReason string
ImageReadyMessage string
InferenceServicePodImageError *ImagePullError // Categorized image pull error from InferenceService pods
TemplateMatchingResults []aimv1alpha1.AIMTemplateCandidateResult
TemplateCache *aimv1alpha1.AIMTemplateCache
ModelCaches *aimv1alpha1.AIMModelCacheList
KVCache *aimv1alpha1.AIMKVCache // Observed AIMKVCache resource
KVCacheConfigMap *corev1.ConfigMap // ConfigMap with KV cache configuration
KVCacheErr error // Error from observing KV cache resources
}
ServiceObservation holds observed state for an AIMService reconciliation.
func (*ServiceObservation) RuntimeName ¶
func (o *ServiceObservation) RuntimeName() string
RuntimeName returns the effective runtime name for the service.
func (*ServiceObservation) TemplateFound ¶
func (o *ServiceObservation) TemplateFound() bool
TemplateFound returns true if a template was resolved (namespace or cluster scope).
type TemplateCandidate ¶
type TemplateCandidate struct {
Name string
Namespace string
Scope TemplateScope
Spec aimv1alpha1.AIMServiceTemplateSpecCommon
Status aimv1alpha1.AIMServiceTemplateStatus
}
TemplateCandidate captures the information needed to evaluate a template during selection.
func (TemplateCandidate) QualifiedName ¶
func (c TemplateCandidate) QualifiedName() string
QualifiedName returns a human-readable identifier for logging/debugging.
type TemplateObservation ¶
type TemplateObservation struct {
Job *batchv1.Job
Image string
ImageResources *corev1.ResourceRequirements
ImagePullSecrets []corev1.LocalObjectReference
ServiceAccountName string
RuntimeConfig *RuntimeConfigResolution
TemplateCaches *aimv1alpha1.AIMTemplateCacheList
GPUModel string
GPUAvailable bool
GPUChecked bool
JobPodImageError *ImagePullError // Categorized image pull error if job pod is stuck
}
TemplateObservation holds the common observed state for both template types
type TemplateObservationOptions ¶
type TemplateObservationOptions[R client.Object] struct { K8sClient client.Client // Required for pod status checking GetRuntime func(ctx context.Context) (R, error) ShouldCheckDiscoveryJob bool GetDiscoveryJob func(ctx context.Context) (*batchv1.Job, error) GetJobNamespace func() string // Namespace where the job runs (for pod lookup) LookupImage func(ctx context.Context) (*ImageLookupResult, error) ResolveRuntimeConfig func(ctx context.Context) (*RuntimeConfigResolution, error) OnRuntimeConfigResolved func(resolution *RuntimeConfigResolution) GetImagePullSecrets func() []corev1.LocalObjectReference // Template's imagePullSecrets GetServiceAccountName func() string // Template's serviceAccountName GetTemplateCaches func(ctx context.Context) (*aimv1alpha1.AIMTemplateCacheList, error) }
TemplateObservationOptions configures ObserveTemplate behaviour.
type TemplatePlanBuilders ¶
type TemplatePlanBuilders struct {
BuildRuntime func(input TemplatePlanInput) client.Object
BuildDiscoveryJob func(input TemplatePlanInput) client.Object
}
TemplatePlanBuilders specifies how to render runtime and discovery job objects.
type TemplatePlanContext ¶
type TemplatePlanContext struct {
Ctx context.Context
Client client.Client
Template metav1.Object
APIVersion string
Kind string
Status aimv1alpha1.AIMTemplateStatusEnum
Observation *TemplateObservation
}
TemplatePlanContext provides metadata needed during plan generation.
type TemplatePlanInput ¶
type TemplatePlanInput struct {
Observation *TemplateObservation
RuntimeConfigSpec aimv1alpha1.AIMRuntimeConfigSpec
OwnerReference metav1.OwnerReference
}
TemplatePlanInput supplies builders with convenient access to observation data.
type TemplateResolution ¶
type TemplateResolution struct {
BaseName string
FinalName string
Derived bool
Scope TemplateScope
}
TemplateResolution captures the result of resolving a template name for a service.
type TemplateScope ¶
type TemplateScope string
TemplateScope indicates whether a template is namespace-scoped, cluster-scoped, or unresolved.
const ( TemplateScopeNone TemplateScope = "" TemplateScopeNamespace TemplateScope = "namespace" TemplateScopeCluster TemplateScope = "cluster" )
func LoadBaseTemplateSpec ¶
func LoadBaseTemplateSpec(ctx context.Context, k8sClient client.Client, service *aimv1alpha1.AIMService, baseName string) (*aimv1alpha1.AIMServiceTemplateSpec, TemplateScope, error)
LoadBaseTemplateSpec fetches the base template spec for a derived template. Searches namespace-scoped templates first, then falls back to cluster-scoped templates.
func ResolveOrCreateModelFromImage ¶
func ResolveOrCreateModelFromImage( ctx context.Context, k8sClient client.Client, serviceNamespace string, imageURI string, runtimeConfig *aimv1alpha1.AIMRuntimeConfigSpec, imagePullSecrets []corev1.LocalObjectReference, serviceAccountName string, parentService *aimv1alpha1.AIMService, ) (modelName string, scope TemplateScope, err error)
ResolveOrCreateModelFromImage searches for existing models matching the image URI, or creates a new one if none exists. Returns the model name and scope.
type TemplateSelectionStatus ¶
type TemplateSelectionStatus struct {
AutoSelected bool
CandidateCount int
SelectionReason string
SelectionMessage string
TemplatesExistButNotReady bool
ImageReady bool
ImageReadyReason string
ImageReadyMessage string
ModelResolutionErr error
TemplateMatchingResults []aimv1alpha1.AIMTemplateCandidateResult
}
TemplateSelectionStatus captures metadata about automatic template selection.
type TemplateSpec ¶
type TemplateSpec interface {
GetModelName() string
GetSpecModelSources() []aimv1alpha1.AIMModelSource
}
TemplateSpec provides the common template specification
type TemplateWithStatus ¶
type TemplateWithStatus interface {
TemplateSpec
client.Object
GetStatus() *aimv1alpha1.AIMServiceTemplateStatus
}
TemplateWithStatus extends TemplateSpec with status access