Documentation
¶
Index ¶
Constants ¶
View Source
const ( // DefaultRuntimeConfigName is the name of the default AIM runtime config DefaultRuntimeConfigName = "default" // MaxConcurrentDiscoveryJobs is the global limit for concurrent discovery jobs across all namespaces MaxConcurrentDiscoveryJobs = 10 // AimLabelDomain is the base domain used for AIM-specific labels. AimLabelDomain = "aim.eai.amd.com" )
View Source
const ( // LabelTemplate is the label key for the template name LabelTemplate = AimLabelDomain + "/template" // LabelService is the label key for the service name LabelService = AimLabelDomain + "/service" // LabelModelID is the label key for the model ID LabelModelID = AimLabelDomain + "/model" // LabelMetric is the label key for the optimization metric LabelMetric = AimLabelDomain + "/metric" // LabelPrecision is the label key for the numeric precision LabelPrecision = AimLabelDomain + "/precision" // LabelCacheType indicates the type of cache (temp or persistent) LabelCacheType = AimLabelDomain + "/cache-type" // LabelTemplateCacheName is the label key for the template cache name (used on artifacts) LabelTemplateCacheName = AimLabelDomain + "/template-cache.name" )
Label keys for AIM resources
View Source
const ( // LabelValueManagedBy is the standard managed-by label value LabelValueManagedBy = "aim-engine" // LabelValueCacheTypeTemp indicates a temporary cache LabelValueCacheTypeTemp = "temp" // LabelValueCacheTypePersistent indicates a persistent cache LabelValueCacheTypePersistent = "persistent" // LabelValueCacheTypeDedicated indicates a dedicated cache owned by an AIMService. // These caches are created for non-cached modes (Never/Auto) to enable unified downloads. LabelValueCacheTypeDedicated = "dedicated" )
Label values
View Source
const ( // DiscoveryBaseBackoffSeconds is the base backoff duration in seconds. // Actual backoff = base * 2^(attempts-1), capped at DiscoveryMaxBackoffSeconds. DiscoveryBaseBackoffSeconds = 60 // 1 minute // DiscoveryMaxBackoffSeconds is the maximum backoff duration in seconds. DiscoveryMaxBackoffSeconds = 3600 // 1 hour )
Discovery circuit breaker configuration
View Source
const ( // Image-related reasons (used by AIMModel, AIMService, AIMServiceTemplate) ReasonImagePullAuthFailure = "ImagePullAuthFailure" ReasonImageNotFound = "ImageNotFound" ReasonImagePullBackOff = "ImagePullBackOff" // Resource resolution/reference reasons (used by multiple types) ReasonNotFound = "NotFound" ReasonNotReady = "NotReady" ReasonCreating = "Creating" ReasonResolved = "Resolved" // Storage/PVC reasons (used by AIMArtifact, AIMService) ReasonPVCProvisioning = "PVCProvisioning" ReasonPVCBound = "PVCBound" ReasonPVCNotBound = "PVCNotBound" ReasonPVCPending = "PVCPending" ReasonPVCLost = "PVCLost" // Generic failure/retry reasons ReasonRetryBackoff = "RetryBackoff" ReasonFailed = "Failed" )
Shared condition reasons used across multiple resource types
View Source
const ( // NodeLabelAMDGPUDeviceID is the primary node label for AMD GPU device IDs (e.g., "74a1" for MI300X) NodeLabelAMDGPUDeviceID = "amd.com/gpu.device-id" // NodeLabelBetaAMDGPUDeviceID is the legacy/beta node label for AMD GPU device IDs NodeLabelBetaAMDGPUDeviceID = "beta.amd.com/gpu.device-id" )
AMD GPU node label keys
View Source
const ( // LabelK8sComponent is the standard Kubernetes component label LabelK8sComponent = "app.kubernetes.io/component" // LabelK8sManagedBy is the standard Kubernetes managed-by label LabelK8sManagedBy = "app.kubernetes.io/managed-by" )
Standard Kubernetes label keys
View Source
const ( // ContainerKServe is the name of the main inference container ContainerKServe = "kserve-container" VolumeSharedMemory = "dshm" // VolumeModelStorage is the name of the model storage volume VolumeModelStorage = "model-storage" MountPathSharedMemory = "/dev/shm" DefaultSharedMemorySize = "8Gi" // DefaultHTTPPort is the default HTTP port for inference services DefaultHTTPPort = 8000 // DefaultGatewayPort is the default gateway port DefaultGatewayPort = 80 // DefaultGPUResourceName is the default resource name for AMD GPUs DefaultGPUResourceName = "amd.com/gpu" // AIMCacheBasePath is the base directory for cached models AIMCacheBasePath = "/workspace/cache" )
InferenceService constants
View Source
const ( // ComponentInference is the component value for inference-related resources ComponentInference = "inference" // ComponentRouting is the component value for routing-related resources ComponentRouting = "routing" // ComponentModelStorage is the component value for storage-related resources ComponentModelStorage = "model-storage" )
Component values for resource labels
View Source
const ( // EnvAIMCachePath is the environment variable for the cache path EnvAIMCachePath = "AIM_CACHE_PATH" // EnvAIMMetric is the environment variable for the optimization metric EnvAIMMetric = "AIM_METRIC" // EnvAIMPrecision is the environment variable for the numeric precision EnvAIMPrecision = "AIM_PRECISION" // EnvAIMProfileID is the environment variable for the profile ID EnvAIMProfileID = "AIM_PROFILE_ID" // EnvVLLMEnableMetrics enables vLLM metrics EnvVLLMEnableMetrics = "VLLM_ENABLE_METRICS" EnvAIMModelID = "AIM_MODEL_ID" )
Environment variable names
View Source
const ( // AnnotationKServeAutoscalerClass is the annotation key for autoscaler class AnnotationKServeAutoscalerClass = "serving.kserve.io/autoscalerClass" // AutoscalerClassNone disables autoscaling AutoscalerClassNone = "none" // AutoscalerClassKeda enables KEDA-based autoscaling AutoscalerClassKeda = "keda" // LabelKServeInferenceService is the label key used by KServe on predictor pods LabelKServeInferenceService = "serving.kserve.io/inferenceservice" // AnnotationOTelSidecarInject is the annotation for OpenTelemetry sidecar injection AnnotationOTelSidecarInject = "sidecar.opentelemetry.io/inject" // AnnotationPrometheusPort is the annotation for Prometheus metrics port AnnotationPrometheusPort = "prometheus.kserve.io/port" // DefaultPrometheusPort is the default port for vLLM metrics DefaultPrometheusPort = "8000" )
KServe annotation and label keys
View Source
const ( // TemplateNameMaxLength is the maximum length for template names (Kubernetes name limit) TemplateNameMaxLength = 63 // DerivedTemplateSuffix is the suffix used for derived templates DerivedTemplateSuffix = "-ovr-" // PredictorServiceSuffix is the suffix added to InferenceService names for predictor services PredictorServiceSuffix = "-predictor" )
Template-related constants
View Source
const ( // LabelKeyModel identifies the owning AIM(Cluster)Model name. // Used on: AIM(Cluster)ServiceTemplate, AIMService, discovery Jobs LabelKeyModel = AimLabelDomain + "/model" // LabelKeyTemplate identifies the owning AIM(Cluster)ServiceTemplate name. // Used on: AIMService, inference Pods LabelKeyTemplate = AimLabelDomain + "/template" // LabelKeyService identifies the owning AIMService name. // Used on: inference Pods, PVCs LabelKeyService = AimLabelDomain + "/service" // LabelKeyOrigin indicates how a resource was created. // Values: auto-generated, derived, manual LabelKeyOrigin = AimLabelDomain + "/origin" // LabelKeyManagedBy indicates what tool/controller manages this resource. LabelKeyManagedBy = AimLabelDomain + "/managed-by" // LabelKeyComponent identifies the role of this resource in the architecture. // Values: inference, discovery, cache LabelKeyComponent = AimLabelDomain + "/component" // LabelKeyCustomModel indicates this is a custom model with inline model sources. // Value: "true" LabelKeyCustomModel = AimLabelDomain + "/custom-model" // LabelKeyTemplateAlias is the user-provided short-hand alias for a custom template. // Used to find templates by their alias before model prefix and hash are added. LabelKeyTemplateAlias = AimLabelDomain + "/template.alias" // LabelKeyGPUModel is the GPU model for this template (e.g., MI300X, MI325X). LabelKeyGPUModel = AimLabelDomain + "/gpu.model" // LabelKeyGPUCount is the number of GPUs for this template. LabelKeyGPUCount = AimLabelDomain + "/gpu.count" // LabelKeyTemplateMetric is the optimization metric (latency, throughput). LabelKeyTemplateMetric = AimLabelDomain + "/template.metric" // LabelKeyTemplatePrecision is the precision (fp8, fp16, bf16). LabelKeyTemplatePrecision = AimLabelDomain + "/template.precision" // LabelKeyCacheType identifies the type of cache. // Values: artifact, template-cache LabelKeyCacheType = AimLabelDomain + "/cache.type" // LabelKeyCacheName identifies the cache resource name. LabelKeyCacheName = AimLabelDomain + "/cache.name" // LabelKeyModelSource identifies the source of the model (e.g., huggingface, s3). LabelKeyModelSource = AimLabelDomain + "/model.source" // LabelValueOriginAutoGenerated indicates the resource was auto-generated by the controller. LabelValueOriginAutoGenerated = "auto-generated" // LabelValueOriginDerived indicates the resource was derived from another resource. LabelValueOriginDerived = "derived" // LabelValueOriginManual indicates the resource was manually created by a user. LabelValueOriginManual = "manual" // LabelValueManagedByController indicates the resource is managed by the AIM controller. LabelValueManagedByController = "aim-controller" // LabelValueComponentInference indicates an inference-related resource. LabelValueComponentInference = "inference" // LabelValueComponentDiscovery indicates a discovery-related resource. LabelValueComponentDiscovery = "discovery" // LabelValueComponentCache indicates a cache-related resource. LabelValueComponentCache = "cache" // LabelValueCacheTypeModel indicates a artifact. LabelValueCacheTypeModel = "artifact" // LabelValueCacheTypeTemplate indicates a template cache. LabelValueCacheTypeTemplate = "template-cache" LabelValueCacheTypeTemplateCache = "template-cache" LabelKeySourceModel = AimLabelDomain + "/source-model" )
View Source
const ( // AnnotationReconciliationPaused, when set to "true", pauses reconciliation for the resource. // The controller will skip all reconciliation logic and return immediately. // This is useful for testing or debugging purposes. AnnotationReconciliationPaused = AimLabelDomain + "/reconciliation-paused" )
AIM annotation keys
Variables ¶
View Source
var AIMStatusPriority = map[AIMStatus]int{ AIMStatusRunning: 7, AIMStatusReady: 6, AIMStatusProgressing: 5, AIMStatusStarting: 4, AIMStatusPending: 3, AIMStatusDegraded: 2, AIMStatusNotAvailable: 1, AIMStatusFailed: 0, }
AIMStatusPriority maps AIMStatus values to priority levels. Higher values indicate more desirable statuses for sorting and filtering.
Functions ¶
func CompareAIMStatus ¶
func GetOperatorNamespace ¶
func GetOperatorNamespace() string
GetOperatorNamespace returns the namespace where the AIM operator runs. The result is cached after the first call.
Types ¶
type AIMStatus ¶
type AIMStatus string
const ( AIMStatusPending AIMStatus = "Pending" AIMStatusStarting AIMStatus = "Starting" AIMStatusProgressing AIMStatus = "Progressing" AIMStatusReady AIMStatus = "Ready" AIMStatusRunning AIMStatus = "Running" AIMStatusDegraded AIMStatus = "Degraded" AIMStatusNotAvailable AIMStatus = "NotAvailable" AIMStatusFailed AIMStatus = "Failed" )
type StatusProvider ¶
type StatusProvider interface {
GetAIMStatus() AIMStatus
}
StatusProvider is implemented by status types that expose their AIMStatus.
Click to show internal directories.
Click to hide internal directories.