shared

package

v0.2.0-rc9 Latest Latest Go to latest Published: Dec 17, 2025 License: MIT Imports: 50 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/silogen/kaiwo

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
Variables
func ApplyHeadroomAndRound(baseSizeBytes int64, headroomPercent int32) int64
func BuildClusterServingRuntime(template aimstate.TemplateState, ownerRef metav1.OwnerReference) *servingv1alpha1.ClusterServingRuntime
func BuildDerivedTemplate(service *aimv1alpha1.AIMService, templateName string, resolvedModelName string, ...) *aimv1alpha1.AIMServiceTemplate
func BuildDiscoveryJob(spec DiscoveryJobSpec) *batchv1.Job
func BuildInferenceService(serviceState aimstate.ServiceState, ownerRef metav1.OwnerReference) *servingv1beta1.InferenceService
func BuildInferenceServiceHTTPRoute(serviceState aimstate.ServiceState, ownerRef metav1.OwnerReference) *gatewayapiv1.HTTPRoute
func BuildServingRuntime(template aimstate.TemplateState, ownerRef metav1.OwnerReference) *servingv1alpha1.ServingRuntime
func BuildServingRuntimeFromState(state aimstate.TemplateState, ownerRef metav1.OwnerReference) *servingv1alpha1.ServingRuntime
func BuildTemplateStateFromObservation(name, namespace string, specCommon aimv1alpha1.AIMServiceTemplateSpecCommon, ...) aimstate.TemplateState
func CountActiveDiscoveryJobs(ctx context.Context, k8sClient client.Client) (int, error)
func DefaultRoutePath(service *aimv1alpha1.AIMService) string
func DerivedTemplateName(baseName, suffix string) string
func EvaluateHTTPRouteStatus(route *gatewayapiv1.HTTPRoute) (bool, string, string)
func EvaluateInferenceServiceStatus(status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, ...)
func EvaluateRoutingStatus(service *aimv1alpha1.AIMService, obs *ServiceObservation, ...) (enabled bool, ready bool, hasFatalError bool)
func FormatRuntimeConfigSources(resolution *RuntimeConfigResolution, namespaceLabel string) []string
func GenerateInferenceServiceName(serviceName, namespace string) string
func GetAMDDeviceIDsForModel(modelName string) []string
func GetClusterGPUResources(ctx context.Context, k8sClient client.Client) (map[string]GPUResourceInfo, error)
func GetClusterServingRuntime(ctx context.Context, k8sClient client.Client, name string) (*servingv1alpha1.ClusterServingRuntime, error)
func GetDiscoveryJob(ctx context.Context, k8sClient client.Client, namespace, templateName string) (*batchv1.Job, error)
func GetImageConfigLabels(ctx context.Context, imageURI string, keychain authn.Keychain) (map[string]string, error)
func GetOperatorNamespace() string
func GetPVCHeadroomPercent(spec aimv1alpha1.AIMRuntimeConfigSpec) int32
func GetServingRuntime(ctx context.Context, k8sClient client.Client, namespace, name string) (*servingv1alpha1.ServingRuntime, error)
func HandleImageMissing(status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, ...) bool
func HandleImageNotReady(status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, ...) bool
func HandleInferenceServicePodImageError(status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, ...) bool
func HandleMissingModelSource(status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, ...) bool
func HandleModelCacheReadiness(service *aimv1alpha1.AIMService, status *aimv1alpha1.AIMServiceStatus, ...) bool
func HandleModelResolutionFailure(status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, ...) bool
func HandlePathTemplateError(status *aimv1alpha1.AIMServiceStatus, service *aimv1alpha1.AIMService, ...) bool
func HandleReconcileErrors(status *aimv1alpha1.AIMServiceStatus, ...) bool
func HandleRuntimeConfigMissing(status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, ...) bool
func HandleTemplateDegraded(status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, ...) bool
func HandleTemplateNotAvailable(status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, ...) bool
func HandleTemplateSelectionFailure(status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, ...) bool
func HasOwnerReference(refs []metav1.OwnerReference, uid types.UID) bool
func InferenceServiceRouteName(serviceName string) string
func InspectImage(ctx context.Context, imageURI string, ...) (*aimv1alpha1.ImageMetadata, error)
func IsDerivedTemplate(labels map[string]string) bool
func IsGPUAvailable(ctx context.Context, k8sClient client.Client, gpuModel string) (bool, error)
func IsJobComplete(job *batchv1.Job) bool
func IsJobFailed(job *batchv1.Job) bool
func IsJobSucceeded(job *batchv1.Job) bool
func JoinRuntimeConfigSources(resolution *RuntimeConfigResolution, namespaceLabel string) string
func ListAvailableGPUs(ctx context.Context, k8sClient client.Client) ([]string, error)
func NodeGPUChangePredicate() predicate.Predicate
func NormalizeRuntimeConfigName(name string) string
func ObserveDerivedTemplate(ctx context.Context, k8sClient client.Client, service *aimv1alpha1.AIMService, ...) error
func ObserveNonDerivedTemplate(ctx context.Context, k8sClient client.Client, service *aimv1alpha1.AIMService, ...) error
func OverridesSuffix(overrides *aimv1alpha1.AIMServiceOverrides) string
func PlanImageResources(ctx context.Context, input ImagePlanInput) ([]client.Object, *aimv1alpha1.ImageMetadata, error)
func PlanTemplateResources(ctx TemplatePlanContext, builders TemplatePlanBuilders) ([]client.Object, bool)
func PopulateObservationFromClusterTemplate(ctx context.Context, k8sClient client.Client, service *aimv1alpha1.AIMService, ...) error
func PopulateObservationFromNamespaceTemplate(ctx context.Context, k8sClient client.Client, service *aimv1alpha1.AIMService, ...) error
func ProjectImageStatus(status *aimv1alpha1.AIMModelStatus, spec aimv1alpha1.AIMModelSpec, ...)
func ProjectServiceStatus(service *aimv1alpha1.AIMService, obs *ServiceObservation, ...)
func ProjectTemplateStatus(ctx context.Context, k8sClient client.Client, clientset kubernetes.Interface, ...) error
func PropagateLabels(parent, child client.Object, config *aimv1alpha1.AIMRuntimeConfigCommon)
func QuantityWithHeadroom(baseSizeBytes int64, headroomPercent int32) resource.Quantity
func RequestsForServices(services []aimv1alpha1.AIMService) []reconcile.Request
func ResolveServiceRoutePath(service *aimv1alpha1.AIMService, ...) (string, error)
func ResolveServiceRouteTimeout(service *aimv1alpha1.AIMService, ...) *string
func ResolveStorageClass(explicitStorageClass string, ...) string
func ResolveTemplateNameForService(ctx context.Context, k8sClient client.Client, service *aimv1alpha1.AIMService) (TemplateResolution, TemplateSelectionStatus, error)
func RuntimeConfigNameForService(service *aimv1alpha1.AIMService, ...) string
func SanitizeLabelValue(s string) string
func SelectBestTemplate(candidates []TemplateCandidate, overrides *aimv1alpha1.AIMServiceOverrides, ...) (*TemplateCandidate, int, SelectionDiagnostics, []CandidateEvaluation)
func SetCondition(conditions *[]metav1.Condition, newCondition metav1.Condition)
func TemplateNameFromSpec(service *aimv1alpha1.AIMService) string
func TemplateRequiresGPU(spec aimv1alpha1.AIMServiceTemplateSpecCommon) bool
func UpdateTemplateGPUAvailability(ctx context.Context, k8sClient client.Client, ...) error
type CandidateEvaluation
type DiscoveryJobSpec
type GPUResourceInfo
type ImageLookupResult
- func LookupImageForClusterTemplate(ctx context.Context, k8sClient client.Client, modelName string) (*ImageLookupResult, error)
- func LookupImageForNamespaceTemplate(ctx context.Context, k8sClient client.Client, namespace, modelName string) (*ImageLookupResult, error)
- func (r *ImageLookupResult) DeepCopy() *ImageLookupResult
type ImageObservation
- func ObserveImage(ctx context.Context, opts ImageObservationOptions) (*ImageObservation, error)
type ImageObservationOptions
type ImagePlanInput
type ImagePullError
- func CheckInferenceServicePodImagePullStatus(ctx context.Context, k8sClient client.Client, ...) *ImagePullError
type ImagePullErrorType
type ImageRegistryError
- func (e *ImageRegistryError) Error() string
- func (e *ImageRegistryError) Unwrap() error
type MetadataFormatError
- func (e *MetadataFormatError) Error() string
type ModelReference
type ParsedDiscovery
- func ParseDiscoveryLogs(ctx context.Context, k8sClient client.Client, clientset kubernetes.Interface, ...) (*ParsedDiscovery, error)
type RuntimeConfigResolution
- func ResolveRuntimeConfig(ctx context.Context, k8sClient client.Client, namespace, configName string) (*RuntimeConfigResolution, error)
type RuntimeObservation
- func ObserveTemplate[R client.Object](ctx context.Context, opts TemplateObservationOptions[R]) (*RuntimeObservation[R], error)
type SelectionDiagnostics
type ServiceObservation
- func (o *ServiceObservation) RuntimeName() string
- func (o *ServiceObservation) TemplateFound() bool
type TemplateCandidate
- func (c TemplateCandidate) QualifiedName() string
type TemplateObservation
type TemplateObservationOptions
type TemplatePlanBuilders
type TemplatePlanContext
type TemplatePlanInput
type TemplateResolution
type TemplateScope
- func LoadBaseTemplateSpec(ctx context.Context, k8sClient client.Client, service *aimv1alpha1.AIMService, ...) (*aimv1alpha1.AIMServiceTemplateSpec, TemplateScope, error)
- func ResolveOrCreateModelFromImage(ctx context.Context, k8sClient client.Client, serviceNamespace string, ...) (modelName string, scope TemplateScope, err error)
type TemplateSelectionStatus
type TemplateSpec
type TemplateWithStatus

Constants ¶

View Source

const (

	// DefaultRuntimeConfigName is the name of the default AIM runtime config
	DefaultRuntimeConfigName = "default"

	// MaxConcurrentDiscoveryJobs is the global limit for concurrent discovery jobs across all namespaces
	MaxConcurrentDiscoveryJobs = 10

	// DefaultPVCHeadroomPercent is the default percentage of extra space to add to PVCs
	// for model storage. This accounts for filesystem overhead and temporary files during
	// model loading. The value represents a percentage (e.g., 10 means 10% extra space).
	DefaultPVCHeadroomPercent = 10

	// AimLabelDomain is the base domain used for AIM-specific labels.
	AimLabelDomain = "aim.silogen.ai"

	// AIM label keys.
	LabelKeyTemplate        = AimLabelDomain + "/template"
	LabelKeyModelID         = AimLabelDomain + "/model-id"
	LabelKeyDerivedTemplate = AimLabelDomain + "/derived-template"
	LabelKeyAutoGenerated   = AimLabelDomain + "/auto-generated"
	LabelKeyImageName       = AimLabelDomain + "/aim-image"
	LabelKeyMetric          = AimLabelDomain + "/template.metric"
	LabelKeyPrecision       = AimLabelDomain + "/template.precision"
	LabelKeyServiceName     = AimLabelDomain + "/service-name"
	LabelKeyModelCache      = AimLabelDomain + "/modelcache"
	LabelKeyTemplateCache   = AimLabelDomain + "/template-cache"
	LabelKeyCacheType       = AimLabelDomain + "/cache-type"
	LabelKeySourceModel     = AimLabelDomain + "/source-model"

	// AIM label values.
	LabelValueRuntimeName        = "aim-runtime"
	LabelValueRuntimeComponent   = "serving-runtime"
	LabelValueManagedBy          = "aim-controller"
	LabelValueDiscoveryName      = "aim-discovery"
	LabelValueDiscoveryComponent = "discovery-job"
	LabelValueServiceName        = "aim-service"
	LabelValueServiceComponent   = "inference-service"
	LabelValueDerivedTemplate    = "true"
	LabelValueAutoGenerated      = "true"

	// Cache type label values.
	LabelValueCacheTypeTemplateCache = "template-cache"
	LabelValueCacheTypeTempService   = "temporary-service-cache"

	// NodeLabelAMDGPUDeviceID is the primary node label for AMD GPU device IDs (e.g., "74a1" for MI300X)
	NodeLabelAMDGPUDeviceID = "amd.com/gpu.device-id"
)

View Source

const (

	// DiscoveryJobBackoffLimit is the number of retries before marking the discovery job as failed
	DiscoveryJobBackoffLimit = 3

	// DiscoveryJobTTLSeconds defines how long completed discovery jobs persist
	// before automatic cleanup. This allows time for status inspection and log retrieval.
	DiscoveryJobTTLSeconds = 60
)

View Source

const (
	// DefaultGPUResourceName is the default resource name for AMD GPUs in Kubernetes
	DefaultGPUResourceName = "amd.com/gpu"

	// DefaultSharedMemorySize is the default size allocated for /dev/shm in inference containers.
	// This is required for efficient inter-process communication in model serving workloads.
	DefaultSharedMemorySize = "8Gi"

	// KubernetesLabelValueMaxLength is the maximum length for a Kubernetes label value
	KubernetesLabelValueMaxLength = 63
)

View Source

const (
	// LabelAutoCreated marks models that were automatically created from service image references
	LabelAutoCreated    = "aim.silogen.ai/auto-created"
	LabelKeyModelSource = "aim.silogen.ai/model-source"
)

View Source

const (
	// MaxRoutePathLength is the maximum allowed length for a route path.
	// This prevents excessively long paths that could cause issues with gateways or proxies.
	MaxRoutePathLength = 200
)

Variables ¶

View Source

var ErrImageNotFound = errors.New("image not found in catalog")

ErrImageNotFound is returned when an image is not found in the catalog

View Source

var (

	// ErrMultipleModelsFound is returned when multiple models exist with the same image URI
	ErrMultipleModelsFound = errors.New("multiple models found with the same image")
)

View Source

var ErrRuntimeConfigNotFound = errors.New("runtime config not found")

ErrRuntimeConfigNotFound indicates that neither namespace nor cluster runtime config could be located.

View Source

var GPUPreferenceOrder = []string{
	"MI325X",
	"MI300X",
	"MI250X",
	"MI210",
	"A100",
	"H100",
}

GPUPreferenceOrder defines the preference order for GPU models when selecting templates. GPUs earlier in the list are preferred over later ones. TODO: Fill in the complete preference order based on performance characteristics.

View Source

var KnownAmdGpuDevices = map[string]string{

	"738c": "MI100",
	"738e": "MI100",
	"7408": "MI250X",
	"740c": "MI250X",
	"740f": "MI210",
	"7410": "MI210",
	"74a0": "MI300A",
	"74a1": "MI300X",
	"74a2": "MI308X",
	"74a5": "MI325X",
	"74a8": "MI308X",
	"74a9": "MI300X",
	"74b5": "MI300X",
	"74b6": "MI308X",
	"74b9": "MI325X",
	"74bd": "MI300X",
	"75a0": "MI350X",
	"75a3": "MI355X",
	"75b0": "MI350X",
	"75b3": "MI355X",

	"7460": "V710",
	"7461": "V710",
	"7448": "W7900",
	"744a": "W7900",
	"7449": "W7800",
	"745e": "W7800",
	"73a2": "W6900X",
	"73a3": "W6800",
	"73ab": "W6800X",
	"73a1": "V620",
	"73ae": "V620",

	"7550": "RX9070",
	"744c": "RX7900",
	"73af": "RX6900",
	"73bf": "RX6800",
}

View Source

var MetricPreferenceOrder = []string{
	"latency",
	"throughput",
}

MetricPreferenceOrder defines preference for optimization metrics. "latency" is preferred over "throughput" by default.

View Source

var PrecisionPreferenceOrder = []string{
	"fp8",
	"fp16",
	"bf16",
	"fp32",
}

PrecisionPreferenceOrder defines preference for precision levels. Lower precision (more optimized) is preferred.

View Source

var ProfileTypePreferenceOrder = []string{
	string(aimv1alpha1.AIMProfileTypeOptimized),
	string(aimv1alpha1.AIMProfileTypePreview),
	string(aimv1alpha1.AIMProfileTypeUnoptimized),
}

Functions ¶

func ApplyHeadroomAndRound ¶

func ApplyHeadroomAndRound(baseSizeBytes int64, headroomPercent int32) int64

ApplyHeadroomAndRound applies headroom percentage to a base size and rounds up to the nearest Gi. This ensures PVC sizes are clean, human-readable values (e.g., "421Gi" instead of "451936812032").

Parameters:

baseSizeBytes: The original size in bytes
headroomPercent: Percentage of extra space to add (0-100, e.g., 10 means 10% extra)

Returns:

The final size in bytes, rounded up to the nearest Gi boundary

Example:

Input: 9,094,593,249 bytes with 10% headroom
With headroom: 10,004,052,573 bytes (9.31 Gi)
Rounded: 10,737,418,240 bytes (10 Gi)

func BuildClusterServingRuntime ¶

func BuildClusterServingRuntime(template aimstate.TemplateState, ownerRef metav1.OwnerReference) *servingv1alpha1.ClusterServingRuntime

BuildClusterServingRuntime creates a KServe ClusterServingRuntime for a cluster-scoped template.

func BuildDerivedTemplate ¶

func BuildDerivedTemplate(
	service *aimv1alpha1.AIMService,
	templateName string,
	resolvedModelName string,
	baseSpec *aimv1alpha1.AIMServiceTemplateSpec,
) *aimv1alpha1.AIMServiceTemplate

BuildDerivedTemplate constructs an AIMServiceTemplate for a service with overrides. The template inherits from the base spec and applies service-specific customizations.

func BuildDiscoveryJob ¶

func BuildDiscoveryJob(spec DiscoveryJobSpec) *batchv1.Job

BuildDiscoveryJob creates a Job that runs model discovery dry-run

func BuildInferenceService ¶

func BuildInferenceService(serviceState aimstate.ServiceState, ownerRef metav1.OwnerReference) *servingv1beta1.InferenceService

BuildInferenceService constructs a KServe InferenceService referencing a ServingRuntime or ClusterServingRuntime.

func BuildInferenceServiceHTTPRoute ¶

func BuildInferenceServiceHTTPRoute(serviceState aimstate.ServiceState, ownerRef metav1.OwnerReference) *gatewayapiv1.HTTPRoute

BuildInferenceServiceHTTPRoute creates an HTTPRoute that exposes the predictor service via the provided gateway parent.

func BuildServingRuntime ¶

func BuildServingRuntime(template aimstate.TemplateState, ownerRef metav1.OwnerReference) *servingv1alpha1.ServingRuntime

BuildServingRuntime creates a KServe ServingRuntime for a namespace-scoped template.

func BuildServingRuntimeFromState ¶

func BuildServingRuntimeFromState(state aimstate.TemplateState, ownerRef metav1.OwnerReference) *servingv1alpha1.ServingRuntime

BuildServingRuntimeFromState constructs a namespaced ServingRuntime from a TemplateState snapshot. This is an adapter function that maintains compatibility with the original signature.

func BuildTemplateStateFromObservation ¶

func BuildTemplateStateFromObservation(
	name, namespace string,
	specCommon aimv1alpha1.AIMServiceTemplateSpecCommon,
	observation *TemplateObservation,
	runtimeConfigSpec aimv1alpha1.AIMRuntimeConfigSpec,
	status *aimv1alpha1.AIMServiceTemplateStatus,
) aimstate.TemplateState

BuildTemplateStateFromObservation constructs a TemplateState from the template specification, observation, and status. This is an adapter function that combines template metadata with observed resources.

func CountActiveDiscoveryJobs ¶

func CountActiveDiscoveryJobs(ctx context.Context, k8sClient client.Client) (int, error)

CountActiveDiscoveryJobs counts the number of active (non-complete) discovery jobs across all namespaces. A job is considered active if it exists and is not in a complete state (succeeded or failed).

func DefaultRoutePath ¶

func DefaultRoutePath(service *aimv1alpha1.AIMService) string

DefaultRoutePath returns the default HTTP route prefix.

func DerivedTemplateName ¶

func DerivedTemplateName(baseName, suffix string) string

DerivedTemplateName constructs a template name from a base name and suffix. Ensures the final name does not exceed Kubernetes name length limits.

func EvaluateHTTPRouteStatus ¶

func EvaluateHTTPRouteStatus(route *gatewayapiv1.HTTPRoute) (bool, string, string)

EvaluateHTTPRouteStatus checks the HTTPRoute status and returns readiness state.

func EvaluateInferenceServiceStatus ¶

func EvaluateInferenceServiceStatus(
	status *aimv1alpha1.AIMServiceStatus,
	obs *ServiceObservation,
	inferenceService *servingv1beta1.InferenceService,
	httpRoute *gatewayapiv1.HTTPRoute,
	routingEnabled bool,
	routingReady bool,
	setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string),
)

EvaluateInferenceServiceStatus checks InferenceService and routing readiness. Updates status conditions based on the InferenceService and routing state.

func EvaluateRoutingStatus ¶

func EvaluateRoutingStatus(
	service *aimv1alpha1.AIMService,
	obs *ServiceObservation,
	status *aimv1alpha1.AIMServiceStatus,
	setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string),
) (enabled bool, ready bool, hasFatalError bool)

EvaluateRoutingStatus checks routing configuration and updates status accordingly. Returns (enabled, ready, hasFatalError) to indicate if routing is enabled, if it's ready, and if there's a terminal error.

func FormatRuntimeConfigSources ¶

func FormatRuntimeConfigSources(resolution *RuntimeConfigResolution, namespaceLabel string) []string

FormatRuntimeConfigSources renders a human-readable list of runtime config sources for logging/events.

func GenerateInferenceServiceName ¶

func GenerateInferenceServiceName(serviceName, namespace string) string

GenerateInferenceServiceName creates a KServe InferenceService name that fits DNS label constraints. KServe creates hostnames in the format: {isvc-name}-predictor-{namespace} These hostnames must be ≤ 63 characters to comply with DNS label limits.

If the original name would exceed the limit, this function: 1. Truncates the base name 2. Appends an 8-character hash of the full original name 3. Ensures the result is RFC1123 compliant

The hash ensures uniqueness while keeping names deterministic and short.

func GetAMDDeviceIDsForModel ¶

func GetAMDDeviceIDsForModel(modelName string) []string

GetAMDDeviceIDsForModel returns all AMD device IDs that map to a given GPU model name. This is the inverse of mapAMDDeviceIDToModel, allowing lookup of all device IDs for a model. Example: GetAMDDeviceIDsForModel("MI300X") returns ["74a1", "74a9", "74b5", "74bd"] Returns empty slice if the model is not found or is not an AMD GPU.

func GetClusterGPUResources ¶

func GetClusterGPUResources(ctx context.Context, k8sClient client.Client) (map[string]GPUResourceInfo, error)

GetClusterGPUResources returns an aggregated view of all GPU resources in the cluster. It scans all nodes and aggregates resources that start with "amd.com/" or "nvidia.com/". Returns a map where keys are GPU models (e.g., "MI300X", "A100") extracted from node labels, and values contain the resource name.

func GetClusterServingRuntime ¶

func GetClusterServingRuntime(ctx context.Context, k8sClient client.Client, name string) (*servingv1alpha1.ClusterServingRuntime, error)

GetClusterServingRuntime fetches a ClusterServingRuntime by name

func GetDiscoveryJob ¶

func GetDiscoveryJob(ctx context.Context, k8sClient client.Client, namespace, templateName string) (*batchv1.Job, error)

GetDiscoveryJob fetches the discovery job for a template. Returns the newest job (by CreationTimestamp) if multiple exist.

func GetImageConfigLabels ¶

func GetImageConfigLabels(ctx context.Context, imageURI string, keychain authn.Keychain) (map[string]string, error)

GetImageConfigLabels is a helper function that retrieves just the labels from an image without parsing them into structured metadata. Useful for debugging.

func GetOperatorNamespace ¶

func GetOperatorNamespace() string

GetOperatorNamespace returns the namespace where the AIM operator runs. It reads the AIM_OPERATOR_NAMESPACE environment variable; if unset, it defaults to "kaiwo-system".

func GetPVCHeadroomPercent ¶

func GetPVCHeadroomPercent(spec aimv1alpha1.AIMRuntimeConfigSpec) int32

GetPVCHeadroomPercent returns the PVC headroom percentage from the runtime config spec. If not set, returns the default value defined in DefaultPVCHeadroomPercent.

func GetServingRuntime ¶

func GetServingRuntime(ctx context.Context, k8sClient client.Client, namespace, name string) (*servingv1alpha1.ServingRuntime, error)

GetServingRuntime fetches a ServingRuntime by namespace and name

func HandleImageMissing ¶

func HandleImageMissing(
	status *aimv1alpha1.AIMServiceStatus,
	obs *ServiceObservation,
	setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string),
) bool

HandleImageMissing checks for missing image and updates status. Returns true if the image is missing.

func HandleImageNotReady ¶

func HandleImageNotReady(
	status *aimv1alpha1.AIMServiceStatus,
	obs *ServiceObservation,
	setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string),
) bool

HandleImageNotReady checks if the resolved image is not yet ready and updates status. Returns true if the service should wait for the image to become ready.

func HandleInferenceServicePodImageError ¶

func HandleInferenceServicePodImageError(
	status *aimv1alpha1.AIMServiceStatus,
	obs *ServiceObservation,
	setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string),
) bool

HandleInferenceServicePodImageError checks for image pull errors in InferenceService pods. Returns true if an image pull error was detected.

func HandleMissingModelSource ¶

func HandleMissingModelSource(
	status *aimv1alpha1.AIMServiceStatus,
	obs *ServiceObservation,
	setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string),
) bool

HandleMissingModelSource checks if the template is available but has no model sources. Returns true if model sources are missing (discovery succeeded but produced no usable sources).

func HandleModelCacheReadiness ¶

func HandleModelCacheReadiness(service *aimv1alpha1.AIMService, status *aimv1alpha1.AIMServiceStatus, obs *ServiceObservation, setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string)) bool

func HandleModelResolutionFailure ¶

func HandleModelResolutionFailure(
	status *aimv1alpha1.AIMServiceStatus,
	obs *ServiceObservation,
	setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string),
) bool

HandleModelResolutionFailure checks for model resolution failures and updates status. Returns true if model resolution failed.

func HandlePathTemplateError ¶

func HandlePathTemplateError(
	status *aimv1alpha1.AIMServiceStatus,
	service *aimv1alpha1.AIMService,
	obs *ServiceObservation,
	setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string),
) bool

HandlePathTemplateError checks for path template errors and updates status. Returns true if there is a path template error. This can occur when routing is enabled (via service spec or runtime config) but the path template is invalid.

func HandleReconcileErrors ¶

func HandleReconcileErrors(
	status *aimv1alpha1.AIMServiceStatus,
	setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string),
	errs controllerutils.ReconcileErrors,
) bool

HandleReconcileErrors processes reconciliation errors and updates service status. Returns true if errors were found and handled.

func HandleRuntimeConfigMissing ¶

func HandleRuntimeConfigMissing(
	status *aimv1alpha1.AIMServiceStatus,
	obs *ServiceObservation,
	setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string),
) bool

HandleRuntimeConfigMissing checks for missing runtime config and updates status. Returns true if the runtime config is missing.

func HandleTemplateDegraded ¶

func HandleTemplateDegraded(
	status *aimv1alpha1.AIMServiceStatus,
	obs *ServiceObservation,
	setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string),
) bool

HandleTemplateDegraded checks if the template is degraded, not available, or failed and updates status. Returns true if the template is degraded, not available, or failed.

func HandleTemplateNotAvailable ¶

func HandleTemplateNotAvailable(
	status *aimv1alpha1.AIMServiceStatus,
	obs *ServiceObservation,
	setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string),
) bool

HandleTemplateNotAvailable checks if the template is not available and updates status. Returns true if the template is not yet available (Pending or Progressing). Sets the service to Pending state because it's waiting for a dependency (the template).

func HandleTemplateSelectionFailure ¶

func HandleTemplateSelectionFailure(
	status *aimv1alpha1.AIMServiceStatus,
	obs *ServiceObservation,
	setCondition func(conditionType string, conditionStatus metav1.ConditionStatus, reason, message string),
) bool

HandleTemplateSelectionFailure reports failures during automatic template selection.

func HasOwnerReference ¶

func HasOwnerReference(refs []metav1.OwnerReference, uid types.UID) bool

HasOwnerReference checks if the given UID exists in the owner references list.

func InferenceServiceRouteName ¶

func InferenceServiceRouteName(serviceName string) string

InferenceServiceRouteName returns the canonical HTTPRoute name for an InferenceService.

func InspectImage ¶

func InspectImage(
	ctx context.Context,
	imageURI string,
	imagePullSecrets []corev1.LocalObjectReference,
	clientset kubernetes.Interface,
	namespace string,
) (*aimv1alpha1.ImageMetadata, error)

InspectImage extracts metadata from a container image using the provided image pull secrets. It uses go-containerregistry to authenticate and fetch image labels, then parses them into the ImageMetadata structure.

Parameters:

ctx: Context for the operation
imageURI: Full container image reference (e.g., "registry.example.com/repo/image:tag")
imagePullSecrets: Kubernetes image pull secrets for authentication
clientset: Kubernetes clientset for accessing secrets
namespace: Namespace where the secrets are located

Returns:

*ImageMetadata: Extracted metadata if successful
error: Any error encountered during inspection (authentication, network, parsing, etc.) Registry access errors are wrapped in ImageRegistryError for categorization.

func IsDerivedTemplate ¶

func IsDerivedTemplate(labels map[string]string) bool

IsDerivedTemplate returns true when the provided labels indicate a controller-managed derived template.

func IsGPUAvailable ¶

func IsGPUAvailable(ctx context.Context, k8sClient client.Client, gpuModel string) (bool, error)

IsGPUAvailable checks if a specific GPU model is available in the cluster. The gpuModel parameter should be the GPU model name (e.g., "MI300X", "A100"), not the resource name. The input is normalized to handle variants like "MI300X (rev 2)" or "Instinct MI300X".

func IsJobComplete ¶

func IsJobComplete(job *batchv1.Job) bool

IsJobComplete returns true if the job has completed (successfully or failed)

func IsJobFailed ¶

func IsJobFailed(job *batchv1.Job) bool

IsJobFailed returns true if the job failed

func IsJobSucceeded ¶

func IsJobSucceeded(job *batchv1.Job) bool

IsJobSucceeded returns true if the job completed successfully

func JoinRuntimeConfigSources ¶

func JoinRuntimeConfigSources(resolution *RuntimeConfigResolution, namespaceLabel string) string

JoinRuntimeConfigSources joins runtime config sources for concise logging.

func ListAvailableGPUs ¶

func ListAvailableGPUs(ctx context.Context, k8sClient client.Client) ([]string, error)

ListAvailableGPUs returns a list of all GPU resource types available in the cluster.

func NodeGPUChangePredicate ¶

func NodeGPUChangePredicate() predicate.Predicate

NodeGPUChangePredicate returns a predicate that triggers reconciles when GPU-related node attributes change.

func NormalizeRuntimeConfigName ¶

func NormalizeRuntimeConfigName(name string) string

NormalizeRuntimeConfigName returns the effective name to use for lookups when the user omits the field.

func ObserveDerivedTemplate ¶

func ObserveDerivedTemplate(
	ctx context.Context,
	k8sClient client.Client,
	service *aimv1alpha1.AIMService,
	resolution TemplateResolution,
	obs *ServiceObservation,
) error

ObserveDerivedTemplate handles observation for services with derived templates. It fetches the derived template if it exists, or loads the base template spec for creation.

func ObserveNonDerivedTemplate ¶

func ObserveNonDerivedTemplate(
	ctx context.Context,
	k8sClient client.Client,
	service *aimv1alpha1.AIMService,
	templateName string,
	preferredScope TemplateScope,
	obs *ServiceObservation,
) error

ObserveNonDerivedTemplate handles observation for services with non-derived templates. It searches for namespace-scoped templates first, then falls back to cluster-scoped templates. Does not set ShouldCreateTemplate - that decision is made in the controller based on whether an explicit templateRef was provided.

func OverridesSuffix ¶

func OverridesSuffix(overrides *aimv1alpha1.AIMServiceOverrides) string

OverridesSuffix computes a hash suffix for service overrides.

func PlanImageResources ¶

func PlanImageResources(ctx context.Context, input ImagePlanInput) ([]client.Object, *aimv1alpha1.ImageMetadata, error)

PlanImageResources plans the desired state for an image resource. It performs metadata extraction if needed and creates ServiceTemplates based on recommendedDeployments.

func PlanTemplateResources ¶

func PlanTemplateResources(ctx TemplatePlanContext, builders TemplatePlanBuilders) ([]client.Object, bool)

PlanTemplateResources produces desired objects based on the observation and controller-provided builders. It respects the global limit on concurrent discovery jobs (MaxConcurrentDiscoveryJobs). Returns the desired objects and a boolean indicating if a requeue is needed (when job limit is reached).

func PopulateObservationFromClusterTemplate ¶

func PopulateObservationFromClusterTemplate(
	ctx context.Context,
	k8sClient client.Client,
	service *aimv1alpha1.AIMService,
	template *aimv1alpha1.AIMClusterServiceTemplate,
	obs *ServiceObservation,
) error

PopulateObservationFromClusterTemplate extracts data from a cluster-scoped template into the observation.

func PopulateObservationFromNamespaceTemplate ¶

func PopulateObservationFromNamespaceTemplate(
	ctx context.Context,
	k8sClient client.Client,
	service *aimv1alpha1.AIMService,
	template *aimv1alpha1.AIMServiceTemplate,
	obs *ServiceObservation,
) error

PopulateObservationFromNamespaceTemplate extracts data from a namespace-scoped template into the observation.

func ProjectImageStatus ¶

func ProjectImageStatus(
	status *aimv1alpha1.AIMModelStatus,
	spec aimv1alpha1.AIMModelSpec,
	observation *ImageObservation,
	extractedMetadata *aimv1alpha1.ImageMetadata,
	extractionErr error,
	observedGeneration int64,
)

ProjectImageStatus updates the status of an image resource based on observation and errors.

func ProjectServiceStatus ¶

func ProjectServiceStatus(
	service *aimv1alpha1.AIMService,
	obs *ServiceObservation,
	inferenceService *servingv1beta1.InferenceService,
	httpRoute *gatewayapiv1.HTTPRoute,
	errs controllerutils.ReconcileErrors,
)

ProjectServiceStatus computes and updates the service status based on observations and errors. This is a high-level orchestrator that calls the individual status handler functions.

func ProjectTemplateStatus ¶

func ProjectTemplateStatus(
	ctx context.Context,
	k8sClient client.Client,
	clientset kubernetes.Interface,
	recorder record.EventRecorder,
	template TemplateWithStatus,
	obs *TemplateObservation,
	errs controllerutils.ReconcileErrors,
	imageNotFoundMessage string,
) error

ProjectTemplateStatus computes status from observation and errors. This is shared between cluster and namespace-scoped template controllers. Modifies templateStatus directly and emits events for discovery phase changes.

func PropagateLabels ¶

func PropagateLabels(parent, child client.Object, config *aimv1alpha1.AIMRuntimeConfigCommon)

PropagateLabels propagates labels from a parent resource to a child resource based on the runtime config's label propagation settings. Only labels whose keys match the patterns defined in the config are copied. The child's existing labels are preserved and only new labels are added.

Parameters:

parent: The source resource whose labels should be propagated
child: The target resource that will receive the propagated labels
config: The runtime config common spec containing label propagation settings

The function does nothing if:

Label propagation is not enabled in the config
The config is nil or has no label propagation settings
The parent has no labels

Special handling for Jobs: Labels are also propagated to the PodTemplateSpec.

func QuantityWithHeadroom ¶

func QuantityWithHeadroom(baseSizeBytes int64, headroomPercent int32) resource.Quantity

QuantityWithHeadroom creates a resource.Quantity with headroom applied and rounded to the nearest Gi. This is a convenience wrapper around ApplyHeadroomAndRound that returns a Kubernetes Quantity.

The returned Quantity uses BinarySI format (Ki, Mi, Gi, Ti suffixes) for compatibility with Kubernetes storage resources.

Parameters:

baseSizeBytes: The original size in bytes
headroomPercent: Percentage of extra space to add (0-100)

Returns:

A resource.Quantity representing the size with headroom, formatted cleanly

func RequestsForServices ¶

func RequestsForServices(services []aimv1alpha1.AIMService) []reconcile.Request

RequestsForServices converts a list of AIMServices to reconcile requests.

func ResolveServiceRoutePath ¶

func ResolveServiceRoutePath(service *aimv1alpha1.AIMService, runtimeConfig aimv1alpha1.AIMRuntimeConfigSpec) (string, error)

ResolveServiceRoutePath renders the HTTP route prefix using service and runtime config context. The precedence order is: 1. Service.Spec.Routing.PathTemplate (highest priority) 2. RuntimeConfig.Routing.PathTemplate (base layer)

func ResolveServiceRouteTimeout ¶

func ResolveServiceRouteTimeout(service *aimv1alpha1.AIMService, runtimeConfig aimv1alpha1.AIMRuntimeConfigSpec) *string

ResolveServiceRouteTimeout resolves the HTTP route timeout using service and runtime config context. The precedence order is: 1. Service.Spec.Routing.RequestTimeout (highest priority) 2. RuntimeConfig.Routing.RequestTimeout (base layer) Returns nil if no timeout is configured at any level.

func ResolveStorageClass ¶

func ResolveStorageClass(explicitStorageClass string, runtimeConfigSpec aimv1alpha1.AIMRuntimeConfigSpec) string

ResolveStorageClass determines the effective storage class using fallback logic:

Use explicit storage class if provided (non-empty)
Fall back to runtime config's defaultStorageClassName if explicit is empty
Empty string means use the cluster's default StorageClass

This implements consistent storage class resolution across all PVC creation paths.

Parameters:

explicitStorageClass: Storage class explicitly specified in the resource spec
runtimeConfigSpec: The resolved runtime configuration spec

Returns:

The effective storage class name (may be empty to use cluster default)

func ResolveTemplateNameForService ¶

func ResolveTemplateNameForService(
	ctx context.Context,
	k8sClient client.Client,
	service *aimv1alpha1.AIMService,
) (TemplateResolution, TemplateSelectionStatus, error)

ResolveTemplateNameForService determines the template name to use for a service. It handles default template lookup, base template resolution, and derived template naming. Returns an empty BaseName/FinalName if no template can be resolved, which indicates the service should enter a degraded state.

func RuntimeConfigNameForService ¶

func RuntimeConfigNameForService(service *aimv1alpha1.AIMService, templateSpec aimv1alpha1.AIMServiceTemplateSpecCommon) string

RuntimeConfigNameForService determines the effective runtime config name for a service.

func SanitizeLabelValue ¶

func SanitizeLabelValue(s string) string

SanitizeLabelValue converts a string to a valid Kubernetes label value. Valid label values must: - Be empty or consist of alphanumeric characters, '-', '_' or '.' - Start and end with an alphanumeric character - Be at most 63 characters Returns "unknown" if the sanitized value is empty.

func SelectBestTemplate ¶

func SelectBestTemplate(
	candidates []TemplateCandidate,
	overrides *aimv1alpha1.AIMServiceOverrides,
	availableGPUs []string,
	allowUnoptimized bool,
) (*TemplateCandidate, int, SelectionDiagnostics, []CandidateEvaluation)

SelectBestTemplate selects the best template candidate from the provided list. The heuristic is: 1. Consider only templates that are Available. 2. Filter by service overrides when provided. 3. Filter by GPUs that exist in the cluster. 4. Prefer namespace-scoped templates over cluster-scoped templates. 5. Prefer higher-tier GPUs, then latency over throughput, then lower precision. Returns (selected template, count of templates with identical preference scores, diagnostics, per-candidate evaluations). If count > 1, the templates are ambiguous (identical in all preference dimensions).

func SetCondition ¶

func SetCondition(conditions *[]metav1.Condition, newCondition metav1.Condition)

SetCondition adds or updates a condition in the conditions list.

func TemplateNameFromSpec ¶

func TemplateNameFromSpec(service *aimv1alpha1.AIMService) string

TemplateNameFromSpec returns the template name from the service spec or status. Falls back to service name if no template reference is found.

func TemplateRequiresGPU ¶

func TemplateRequiresGPU(spec aimv1alpha1.AIMServiceTemplateSpecCommon) bool

TemplateRequiresGPU returns true if the template spec declares a GPU selector with a model.

func UpdateTemplateGPUAvailability ¶

func UpdateTemplateGPUAvailability(
	ctx context.Context,
	k8sClient client.Client,
	spec aimv1alpha1.AIMServiceTemplateSpecCommon,
	obs *TemplateObservation,
) error

UpdateTemplateGPUAvailability checks whether the GPU model declared by the template exists in the cluster. It updates the provided TemplateObservation with the result of the check. The GPU model is normalized to ensure consistent matching across different label formats.

Types ¶

type CandidateEvaluation ¶

type CandidateEvaluation struct {
	Candidate TemplateCandidate
	Status    string // "chosen" or "rejected"
	Reason    string // CamelCase reason
	Rank      int    // For candidates that passed all filters
}

CandidateEvaluation captures why a specific candidate was chosen or rejected.

type DiscoveryJobSpec ¶

type DiscoveryJobSpec struct {
	TemplateName     string
	TemplateSpec     aimv1alpha1.AIMServiceTemplateSpecCommon
	Namespace        string
	ModelID          string
	Image            string
	Env              []corev1.EnvVar
	ImagePullSecrets []corev1.LocalObjectReference
	ServiceAccount   string
	OwnerRef         metav1.OwnerReference
}

DiscoveryJobSpec defines parameters for creating a discovery job

type GPUResourceInfo ¶

type GPUResourceInfo struct {
	// ResourceName is the full Kubernetes resource name (e.g., "amd.com/gpu").
	ResourceName string
}

GPUResourceInfo contains GPU resource information for a specific GPU model.

type ImageLookupResult ¶

type ImageLookupResult struct {
	Image     string
	Resources corev1.ResourceRequirements
}

ImageLookupResult captures the resolved image metadata from the catalog.

func LookupImageForClusterTemplate ¶

func LookupImageForClusterTemplate(ctx context.Context, k8sClient client.Client, modelName string) (*ImageLookupResult, error)

LookupImageForClusterTemplate looks up the container image for a cluster-scoped template. It searches only in AIMClusterModel resources. Returns ErrImageNotFound if no image is found in the catalog.

func LookupImageForNamespaceTemplate ¶

func LookupImageForNamespaceTemplate(ctx context.Context, k8sClient client.Client, namespace, modelName string) (*ImageLookupResult, error)

LookupImageForNamespaceTemplate looks up the container image for a namespace-scoped template. It searches AIMModel resources in the specified namespace first, then falls back to cluster-scoped AIMClusterModel resources. Returns ErrImageNotFound if no image is found in either location.

func (*ImageLookupResult) DeepCopy ¶

func (r *ImageLookupResult) DeepCopy() *ImageLookupResult

DeepCopy returns a deep copy of the ImageLookupResult.

type ImageObservation ¶

type ImageObservation struct {
	// MetadataAlreadyAttempted is true if we've already attempted metadata extraction.
	MetadataAlreadyAttempted bool

	// MetadataExtracted is true if metadata was successfully extracted.
	MetadataExtracted bool

	// ImageMetadata contains the extracted metadata (if extraction succeeded).
	ImageMetadata *aimv1alpha1.ImageMetadata

	// RuntimeConfigResolution contains the resolved runtime config (for image pull secrets).
	RuntimeConfigResolution *RuntimeConfigResolution

	// ExistingTemplates are the ServiceTemplates currently owned by this image.
	ExistingTemplates []client.Object

	// DiscoveryEnabled reflects whether discovery is enabled from runtime config.
	// Discovery is now always attempted unless disabled by runtime config.
	DiscoveryEnabled bool

	// MetadataError captures the latest metadata format issue encountered during extraction.
	MetadataError *MetadataFormatError

	// RegistryError captures categorized registry access errors (auth, not-found, etc.).
	RegistryError *ImageRegistryError

	// MetadataExtractionErr captures non-format extraction failures (e.g., registry or auth errors).
	MetadataExtractionErr error

	// TemplatesAutoGenerated tracks whether auto-generated templates were requested this cycle.
	TemplatesAutoGenerated bool
}

ImageObservation holds the observed state for an AIMModel or AIMClusterModel.

func ObserveImage ¶

func ObserveImage(ctx context.Context, opts ImageObservationOptions) (*ImageObservation, error)

ObserveImage gathers the current state for an image resource.

type ImageObservationOptions ¶

type ImageObservationOptions struct {
	// GetRuntimeConfig returns the runtime config for this scope (namespace or cluster).
	GetRuntimeConfig func(ctx context.Context) (*RuntimeConfigResolution, error)

	// ListOwnedTemplates returns templates owned by this image.
	ListOwnedTemplates func(ctx context.Context) ([]client.Object, error)

	// GetCurrentStatus returns the current status to check for existing conditions.
	GetCurrentStatus func() *aimv1alpha1.AIMModelStatus

	// GetImageSpec returns the image spec.
	GetImageSpec func() aimv1alpha1.AIMModelSpec
}

ImageObservationOptions provides callbacks for observing image state.

type ImagePlanInput ¶

type ImagePlanInput struct {
	// ImageName is the name of the image resource.
	ImageName string

	// Namespace is the namespace (empty for cluster-scoped).
	Namespace string

	// ImageSpec is the image specification.
	ImageSpec aimv1alpha1.AIMModelSpec

	// Observation is the observed state.
	Observation *ImageObservation

	// OwnerReference for created templates.
	OwnerReference []metav1.OwnerReference

	// Clientset for image inspection.
	Clientset kubernetes.Interface

	// IsClusterScoped indicates if this is a cluster-scoped image.
	IsClusterScoped bool

	// ParentObject is the AIMModel or AIMClusterModel for label propagation.
	ParentObject client.Object
}

ImagePlanInput provides the input for planning image resources.

type ImagePullError ¶

type ImagePullError struct {
	Type            ImagePullErrorType
	Container       string
	Reason          string // e.g., "ImagePullBackOff", "ErrImagePull"
	Message         string // Full error message from Kubernetes
	IsInitContainer bool
}

ImagePullError contains categorized information about an image pull failure

func CheckInferenceServicePodImagePullStatus ¶

func CheckInferenceServicePodImagePullStatus(ctx context.Context, k8sClient client.Client, inferenceServiceName, namespace string) *ImagePullError

CheckInferenceServicePodImagePullStatus checks if an InferenceService's pods are stuck in ImagePullBackOff or ErrImagePull state. It looks for pods with the isvc.serving.kserve.io/inferenceservice label matching the InferenceService name. Returns the image pull error details if found, or nil otherwise.

type ImagePullErrorType ¶

type ImagePullErrorType string

ImagePullErrorType categorizes image pull errors

const (
	ImagePullErrorAuth     ImagePullErrorType = "auth"
	ImagePullErrorNotFound ImagePullErrorType = "not-found"
	ImagePullErrorGeneric  ImagePullErrorType = "generic"
)

type ImageRegistryError ¶

type ImageRegistryError struct {
	Type    ImagePullErrorType // From template.go
	Message string
	Cause   error
}

ImageRegistryError wraps registry access errors with categorization

func (*ImageRegistryError) Error ¶

func (e *ImageRegistryError) Error() string

func (*ImageRegistryError) Unwrap ¶

func (e *ImageRegistryError) Unwrap() error

type MetadataFormatError ¶

type MetadataFormatError struct {
	Reason  string
	Message string
}

MetadataFormatError indicates the image metadata is malformed and cannot be processed.

func (*MetadataFormatError) Error ¶

func (e *MetadataFormatError) Error() string

type ModelReference ¶

type ModelReference struct {
	Name  string
	Scope TemplateScope
}

ModelReference represents a found model

type ParsedDiscovery ¶

type ParsedDiscovery struct {
	ModelSources []aimv1alpha1.AIMModelSource
	Profile      *aimv1alpha1.AIMProfile
}

ParsedDiscovery holds the parsed discovery result

func ParseDiscoveryLogs ¶

func ParseDiscoveryLogs(ctx context.Context, k8sClient client.Client, clientset kubernetes.Interface, job *batchv1.Job) (*ParsedDiscovery, error)

ParseDiscoveryLogs parses the discovery job output to extract model sources and profile. Reads pod logs from the completed job and parses the JSON output.

type RuntimeConfigResolution ¶

type RuntimeConfigResolution struct {
	// Name is the runtime config name requested by the consumer.
	Name string

	// Namespace is the consumer namespace used when searching for AIMRuntimeConfig.
	Namespace string

	ClusterConfig           *aimv1alpha1.AIMClusterRuntimeConfig
	NamespaceConfig         *aimv1alpha1.AIMRuntimeConfig
	ClusterConfigNotFound   bool
	NamespaceConfigNotFound bool

	EffectiveSpec aimv1alpha1.AIMRuntimeConfigSpec
	ResolvedRef   *aimv1alpha1.AIMResolvedRuntimeConfig
}

RuntimeConfigResolution captures the resolved runtime configuration. When both namespace and cluster configs exist, they are merged with namespace config taking precedence.

func ResolveRuntimeConfig ¶

func ResolveRuntimeConfig(ctx context.Context, k8sClient client.Client, namespace, configName string) (*RuntimeConfigResolution, error)

ResolveRuntimeConfig resolves runtime config with field-level merging. When both cluster and namespace configs exist, cluster config is used as base and namespace config fields override/merge on top. When configName is empty, the default runtime config name is used.

type RuntimeObservation ¶

type RuntimeObservation[R client.Object] struct {
	Runtime R
	TemplateObservation
}

RuntimeObservation combines TemplateObservation with a controller-specific runtime object.

func ObserveTemplate ¶

func ObserveTemplate[R client.Object](ctx context.Context, opts TemplateObservationOptions[R]) (*RuntimeObservation[R], error)

ObserveTemplate gathers runtime, discovery job, image, and runtime config information with common error handling.

type SelectionDiagnostics ¶

type SelectionDiagnostics struct {
	TotalCandidates                  int
	AfterAvailabilityFilter          int
	AfterUnoptimizedFilter           int
	AfterOverridesFilter             int
	AfterGPUAvailabilityFilter       int
	UnoptimizedTemplatesWereFiltered bool
}

SelectionDiagnostics provides detailed information about why template selection failed.

type ServiceObservation ¶

type ServiceObservation struct {
	InferenceService              *servingv1beta1.InferenceService
	TemplateName                  string
	BaseTemplateName              string
	Scope                         TemplateScope
	AutoSelectedTemplate          bool
	TemplateAvailable             bool
	TemplateOwnedByService        bool
	ShouldCreateTemplate          bool
	RuntimeConfigSpec             aimv1alpha1.AIMRuntimeConfigSpec
	ResolvedRuntimeConfig         *aimv1alpha1.AIMResolvedRuntimeConfig
	ResolvedImage                 *aimv1alpha1.AIMResolvedReference
	RoutePath                     string
	RouteTimeout                  *string
	PathTemplateErr               error
	RuntimeConfigErr              error
	ImageErr                      error
	ModelResolutionErr            error
	TemplateStatus                *aimv1alpha1.AIMServiceTemplateStatus
	TemplateSpecCommon            aimv1alpha1.AIMServiceTemplateSpecCommon
	TemplateSpec                  *aimv1alpha1.AIMServiceTemplateSpec
	TemplateNamespace             string
	ImageResources                *corev1.ResourceRequirements
	TemplateSelectionReason       string
	TemplateSelectionMessage      string
	TemplateSelectionCount        int
	TemplatesExistButNotReady     bool // True when templates exist but aren't Available yet
	ImageReady                    bool
	ImageReadyReason              string
	ImageReadyMessage             string
	InferenceServicePodImageError *ImagePullError // Categorized image pull error from InferenceService pods
	TemplateMatchingResults       []aimv1alpha1.AIMTemplateCandidateResult
	TemplateCache                 *aimv1alpha1.AIMTemplateCache
	ModelCaches                   *aimv1alpha1.AIMModelCacheList
	KVCache                       *aimv1alpha1.AIMKVCache // Observed AIMKVCache resource
	KVCacheConfigMap              *corev1.ConfigMap       // ConfigMap with KV cache configuration
	KVCacheErr                    error                   // Error from observing KV cache resources
}

ServiceObservation holds observed state for an AIMService reconciliation.

func (*ServiceObservation) RuntimeName ¶

func (o *ServiceObservation) RuntimeName() string

RuntimeName returns the effective runtime name for the service.

func (*ServiceObservation) TemplateFound ¶

func (o *ServiceObservation) TemplateFound() bool

TemplateFound returns true if a template was resolved (namespace or cluster scope).

type TemplateCandidate ¶

type TemplateCandidate struct {
	Name      string
	Namespace string
	Scope     TemplateScope
	Spec      aimv1alpha1.AIMServiceTemplateSpecCommon
	Status    aimv1alpha1.AIMServiceTemplateStatus
}

TemplateCandidate captures the information needed to evaluate a template during selection.

func (TemplateCandidate) QualifiedName ¶

func (c TemplateCandidate) QualifiedName() string

QualifiedName returns a human-readable identifier for logging/debugging.

type TemplateObservation ¶

type TemplateObservation struct {
	Job                *batchv1.Job
	Image              string
	ImageResources     *corev1.ResourceRequirements
	ImagePullSecrets   []corev1.LocalObjectReference
	ServiceAccountName string
	RuntimeConfig      *RuntimeConfigResolution
	TemplateCaches     *aimv1alpha1.AIMTemplateCacheList
	GPUModel           string
	GPUAvailable       bool
	GPUChecked         bool
	JobPodImageError   *ImagePullError // Categorized image pull error if job pod is stuck
}

TemplateObservation holds the common observed state for both template types

type TemplateObservationOptions ¶

type TemplateObservationOptions[R client.Object] struct {
	K8sClient               client.Client // Required for pod status checking
	GetRuntime              func(ctx context.Context) (R, error)
	ShouldCheckDiscoveryJob bool
	GetDiscoveryJob         func(ctx context.Context) (*batchv1.Job, error)
	GetJobNamespace         func() string // Namespace where the job runs (for pod lookup)
	LookupImage             func(ctx context.Context) (*ImageLookupResult, error)
	ResolveRuntimeConfig    func(ctx context.Context) (*RuntimeConfigResolution, error)
	OnRuntimeConfigResolved func(resolution *RuntimeConfigResolution)
	GetImagePullSecrets     func() []corev1.LocalObjectReference // Template's imagePullSecrets
	GetServiceAccountName   func() string                        // Template's serviceAccountName
	GetTemplateCaches       func(ctx context.Context) (*aimv1alpha1.AIMTemplateCacheList, error)
}

TemplateObservationOptions configures ObserveTemplate behaviour.

type TemplatePlanBuilders ¶

type TemplatePlanBuilders struct {
	BuildRuntime      func(input TemplatePlanInput) client.Object
	BuildDiscoveryJob func(input TemplatePlanInput) client.Object
}

TemplatePlanBuilders specifies how to render runtime and discovery job objects.

type TemplatePlanContext ¶

type TemplatePlanContext struct {
	Ctx         context.Context
	Client      client.Client
	Template    metav1.Object
	APIVersion  string
	Kind        string
	Status      aimv1alpha1.AIMTemplateStatusEnum
	Observation *TemplateObservation
}

TemplatePlanContext provides metadata needed during plan generation.

type TemplatePlanInput ¶

type TemplatePlanInput struct {
	Observation       *TemplateObservation
	RuntimeConfigSpec aimv1alpha1.AIMRuntimeConfigSpec
	OwnerReference    metav1.OwnerReference
}

TemplatePlanInput supplies builders with convenient access to observation data.

type TemplateResolution ¶

type TemplateResolution struct {
	BaseName  string
	FinalName string
	Derived   bool
	Scope     TemplateScope
}

TemplateResolution captures the result of resolving a template name for a service.

type TemplateScope ¶

type TemplateScope string

TemplateScope indicates whether a template is namespace-scoped, cluster-scoped, or unresolved.

const (
	TemplateScopeNone      TemplateScope = ""
	TemplateScopeNamespace TemplateScope = "namespace"
	TemplateScopeCluster   TemplateScope = "cluster"
)

func LoadBaseTemplateSpec ¶

func LoadBaseTemplateSpec(ctx context.Context, k8sClient client.Client, service *aimv1alpha1.AIMService, baseName string) (*aimv1alpha1.AIMServiceTemplateSpec, TemplateScope, error)

LoadBaseTemplateSpec fetches the base template spec for a derived template. Searches namespace-scoped templates first, then falls back to cluster-scoped templates.

func ResolveOrCreateModelFromImage ¶

func ResolveOrCreateModelFromImage(
	ctx context.Context,
	k8sClient client.Client,
	serviceNamespace string,
	imageURI string,
	runtimeConfig *aimv1alpha1.AIMRuntimeConfigSpec,
	imagePullSecrets []corev1.LocalObjectReference,
	serviceAccountName string,
	parentService *aimv1alpha1.AIMService,
) (modelName string, scope TemplateScope, err error)

ResolveOrCreateModelFromImage searches for existing models matching the image URI, or creates a new one if none exists. Returns the model name and scope.

type TemplateSelectionStatus ¶

type TemplateSelectionStatus struct {
	AutoSelected              bool
	CandidateCount            int
	SelectionReason           string
	SelectionMessage          string
	TemplatesExistButNotReady bool
	ImageReady                bool
	ImageReadyReason          string
	ImageReadyMessage         string
	ModelResolutionErr        error
	TemplateMatchingResults   []aimv1alpha1.AIMTemplateCandidateResult
}

TemplateSelectionStatus captures metadata about automatic template selection.

type TemplateSpec ¶

type TemplateSpec interface {
	GetModelName() string
	GetSpecModelSources() []aimv1alpha1.AIMModelSource
}

TemplateSpec provides the common template specification

type TemplateWithStatus ¶

type TemplateWithStatus interface {
	TemplateSpec
	client.Object
	GetStatus() *aimv1alpha1.AIMServiceTemplateStatus
}

TemplateWithStatus extends TemplateSpec with status access

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL