constants

package
v0.2.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 24, 2026 License: MIT Imports: 3 Imported by: 0

Documentation

Index

Constants

View Source
const (

	// DefaultRuntimeConfigName is the name of the default AIM runtime config
	DefaultRuntimeConfigName = "default"

	// MaxConcurrentDiscoveryJobs is the global limit for concurrent discovery jobs across all namespaces
	MaxConcurrentDiscoveryJobs = 10

	// AimLabelDomain is the base domain used for AIM-specific labels.
	AimLabelDomain = "aim.eai.amd.com"
)
View Source
const (
	// LabelTemplate is the label key for the template name
	LabelTemplate = AimLabelDomain + "/template"
	// LabelService is the label key for the service name
	LabelService = AimLabelDomain + "/service"
	// LabelModelID is the label key for the model ID
	LabelModelID = AimLabelDomain + "/model"
	// LabelMetric is the label key for the optimization metric
	LabelMetric = AimLabelDomain + "/metric"
	// LabelPrecision is the label key for the numeric precision
	LabelPrecision = AimLabelDomain + "/precision"
	// LabelCacheType indicates the type of cache (temp or persistent)
	LabelCacheType = AimLabelDomain + "/cache-type"
	// LabelTemplateCacheName is the label key for the template cache name (used on artifacts)
	LabelTemplateCacheName = AimLabelDomain + "/template-cache.name"
)

Label keys for AIM resources

View Source
const (
	// LabelValueManagedBy is the standard managed-by label value
	LabelValueManagedBy = "aim-engine"
	// LabelValueCacheTypeTemp indicates a temporary cache
	LabelValueCacheTypeTemp = "temp"
	// LabelValueCacheTypePersistent indicates a persistent cache
	LabelValueCacheTypePersistent = "persistent"
	// LabelValueCacheTypeDedicated indicates a dedicated cache owned by an AIMService.
	// These caches are created for non-cached modes (Never/Auto) to enable unified downloads.
	LabelValueCacheTypeDedicated = "dedicated"
)

Label values

View Source
const (
	// DiscoveryBaseBackoffSeconds is the base backoff duration in seconds.
	// Actual backoff = base * 2^(attempts-1), capped at DiscoveryMaxBackoffSeconds.
	DiscoveryBaseBackoffSeconds = 60 // 1 minute

	// DiscoveryMaxBackoffSeconds is the maximum backoff duration in seconds.
	DiscoveryMaxBackoffSeconds = 3600 // 1 hour
)

Discovery circuit breaker configuration

View Source
const (
	// Image-related reasons (used by AIMModel, AIMService, AIMServiceTemplate)
	ReasonImagePullAuthFailure = "ImagePullAuthFailure"
	ReasonImageNotFound        = "ImageNotFound"
	ReasonImagePullBackOff     = "ImagePullBackOff"

	// Resource resolution/reference reasons (used by multiple types)
	ReasonNotFound = "NotFound"
	ReasonNotReady = "NotReady"
	ReasonCreating = "Creating"
	ReasonResolved = "Resolved"

	// Storage/PVC reasons (used by AIMArtifact, AIMService)
	ReasonPVCProvisioning = "PVCProvisioning"
	ReasonPVCBound        = "PVCBound"
	ReasonPVCNotBound     = "PVCNotBound"
	ReasonPVCPending      = "PVCPending"
	ReasonPVCLost         = "PVCLost"

	// Generic failure/retry reasons
	ReasonRetryBackoff = "RetryBackoff"
	ReasonFailed       = "Failed"
)

Shared condition reasons used across multiple resource types

View Source
const (
	// NodeLabelAMDGPUDeviceID is the primary node label for AMD GPU device IDs (e.g., "74a1" for MI300X)
	NodeLabelAMDGPUDeviceID = "amd.com/gpu.device-id"

	// NodeLabelBetaAMDGPUDeviceID is the legacy/beta node label for AMD GPU device IDs
	NodeLabelBetaAMDGPUDeviceID = "beta.amd.com/gpu.device-id"
)

AMD GPU node label keys

View Source
const (
	// LabelK8sComponent is the standard Kubernetes component label
	LabelK8sComponent = "app.kubernetes.io/component"
	// LabelK8sManagedBy is the standard Kubernetes managed-by label
	LabelK8sManagedBy = "app.kubernetes.io/managed-by"
)

Standard Kubernetes label keys

View Source
const (
	// ContainerKServe is the name of the main inference container
	ContainerKServe = "kserve-container"
	// VolumeSharedMemory is the name of the shared memory volume
	VolumeSharedMemory = "dshm"
	// VolumeModelStorage is the name of the model storage volume
	VolumeModelStorage = "model-storage"
	// MountPathSharedMemory is the mount path for shared memory
	MountPathSharedMemory = "/dev/shm"
	// DefaultSharedMemorySize is the default size for /dev/shm
	DefaultSharedMemorySize = "8Gi"
	// DefaultHTTPPort is the default HTTP port for inference services
	DefaultHTTPPort = 8000
	// DefaultGatewayPort is the default gateway port
	DefaultGatewayPort = 80
	// DefaultGPUResourceName is the default resource name for AMD GPUs
	DefaultGPUResourceName = "amd.com/gpu"
	// AIMCacheBasePath is the base directory for cached models
	AIMCacheBasePath = "/workspace/cache"
)

InferenceService constants

View Source
const (
	// ComponentInference is the component value for inference-related resources
	ComponentInference = "inference"
	// ComponentRouting is the component value for routing-related resources
	ComponentRouting = "routing"
	// ComponentModelStorage is the component value for storage-related resources
	ComponentModelStorage = "model-storage"
)

Component values for resource labels

View Source
const (
	// EnvAIMCachePath is the environment variable for the cache path
	EnvAIMCachePath = "AIM_CACHE_PATH"
	// EnvAIMMetric is the environment variable for the optimization metric
	EnvAIMMetric = "AIM_METRIC"
	// EnvAIMPrecision is the environment variable for the numeric precision
	EnvAIMPrecision = "AIM_PRECISION"
	// EnvAIMProfileID is the environment variable for the profile ID
	EnvAIMProfileID = "AIM_PROFILE_ID"
	// EnvVLLMEnableMetrics enables vLLM metrics
	EnvVLLMEnableMetrics = "VLLM_ENABLE_METRICS"

	EnvAIMModelID = "AIM_MODEL_ID"
)

Environment variable names

View Source
const (
	// AnnotationKServeAutoscalerClass is the annotation key for autoscaler class
	AnnotationKServeAutoscalerClass = "serving.kserve.io/autoscalerClass"
	// AutoscalerClassNone disables autoscaling
	AutoscalerClassNone = "none"
	// AutoscalerClassKeda enables KEDA-based autoscaling
	AutoscalerClassKeda = "keda"
	// LabelKServeInferenceService is the label key used by KServe on predictor pods
	LabelKServeInferenceService = "serving.kserve.io/inferenceservice"
	// AnnotationOTelSidecarInject is the annotation for OpenTelemetry sidecar injection
	AnnotationOTelSidecarInject = "sidecar.opentelemetry.io/inject"
	// AnnotationPrometheusPort is the annotation for Prometheus metrics port
	AnnotationPrometheusPort = "prometheus.kserve.io/port"
	// DefaultPrometheusPort is the default port for vLLM metrics
	DefaultPrometheusPort = "8000"
)

KServe annotation and label keys

View Source
const (
	// TemplateNameMaxLength is the maximum length for template names (Kubernetes name limit)
	TemplateNameMaxLength = 63
	// DerivedTemplateSuffix is the suffix used for derived templates
	DerivedTemplateSuffix = "-ovr-"
	// PredictorServiceSuffix is the suffix added to InferenceService names for predictor services
	PredictorServiceSuffix = "-predictor"
)

Template-related constants

View Source
const (

	// LabelKeyModel identifies the owning AIM(Cluster)Model name.
	// Used on: AIM(Cluster)ServiceTemplate, AIMService, discovery Jobs
	LabelKeyModel = AimLabelDomain + "/model"

	// LabelKeyTemplate identifies the owning AIM(Cluster)ServiceTemplate name.
	// Used on: AIMService, inference Pods
	LabelKeyTemplate = AimLabelDomain + "/template"

	// LabelKeyService identifies the owning AIMService name.
	// Used on: inference Pods, PVCs
	LabelKeyService = AimLabelDomain + "/service"

	// LabelKeyOrigin indicates how a resource was created.
	// Values: auto-generated, derived, manual
	LabelKeyOrigin = AimLabelDomain + "/origin"

	// LabelKeyManagedBy indicates what tool/controller manages this resource.
	LabelKeyManagedBy = AimLabelDomain + "/managed-by"

	// LabelKeyComponent identifies the role of this resource in the architecture.
	// Values: inference, discovery, cache
	LabelKeyComponent = AimLabelDomain + "/component"

	// LabelKeyCustomModel indicates this is a custom model with inline model sources.
	// Value: "true"
	LabelKeyCustomModel = AimLabelDomain + "/custom-model"

	// LabelKeyTemplateAlias is the user-provided short-hand alias for a custom template.
	// Used to find templates by their alias before model prefix and hash are added.
	LabelKeyTemplateAlias = AimLabelDomain + "/template.alias"

	// LabelKeyGPUModel is the GPU model for this template (e.g., MI300X, MI325X).
	LabelKeyGPUModel = AimLabelDomain + "/gpu.model"

	// LabelKeyGPUCount is the number of GPUs for this template.
	LabelKeyGPUCount = AimLabelDomain + "/gpu.count"

	// LabelKeyTemplateMetric is the optimization metric (latency, throughput).
	LabelKeyTemplateMetric = AimLabelDomain + "/template.metric"

	// LabelKeyTemplatePrecision is the precision (fp8, fp16, bf16).
	LabelKeyTemplatePrecision = AimLabelDomain + "/template.precision"

	// LabelKeyCacheType identifies the type of cache.
	// Values: artifact, template-cache
	LabelKeyCacheType = AimLabelDomain + "/cache.type"

	// LabelKeyCacheName identifies the cache resource name.
	LabelKeyCacheName = AimLabelDomain + "/cache.name"

	// LabelKeyModelSource identifies the source of the model (e.g., huggingface, s3).
	LabelKeyModelSource = AimLabelDomain + "/model.source"

	// LabelValueOriginAutoGenerated indicates the resource was auto-generated by the controller.
	LabelValueOriginAutoGenerated = "auto-generated"

	// LabelValueOriginDerived indicates the resource was derived from another resource.
	LabelValueOriginDerived = "derived"

	// LabelValueOriginManual indicates the resource was manually created by a user.
	LabelValueOriginManual = "manual"

	// LabelValueManagedByController indicates the resource is managed by the AIM controller.
	LabelValueManagedByController = "aim-controller"

	// LabelValueComponentInference indicates an inference-related resource.
	LabelValueComponentInference = "inference"

	// LabelValueComponentDiscovery indicates a discovery-related resource.
	LabelValueComponentDiscovery = "discovery"

	// LabelValueComponentCache indicates a cache-related resource.
	LabelValueComponentCache = "cache"

	// LabelValueCacheTypeModel indicates a artifact.
	LabelValueCacheTypeModel = "artifact"

	// LabelValueCacheTypeTemplate indicates a template cache.
	LabelValueCacheTypeTemplate = "template-cache"

	LabelValueCacheTypeTemplateCache = "template-cache"
	LabelKeySourceModel              = AimLabelDomain + "/source-model"
)
View Source
const (
	// AnnotationReconciliationPaused, when set to "true", pauses reconciliation for the resource.
	// The controller will skip all reconciliation logic and return immediately.
	// This is useful for testing or debugging purposes.
	AnnotationReconciliationPaused = AimLabelDomain + "/reconciliation-paused"
)

AIM annotation keys

Variables

AIMStatusPriority maps AIMStatus values to priority levels. Higher values indicate more desirable statuses for sorting and filtering.

Functions

func CompareAIMStatus

func CompareAIMStatus(a AIMStatus, b AIMStatus) int

func GetOperatorNamespace

func GetOperatorNamespace() string

GetOperatorNamespace returns the namespace where the AIM operator runs. The result is cached after the first call.

Types

type AIMStatus

type AIMStatus string
const (
	AIMStatusPending      AIMStatus = "Pending"
	AIMStatusStarting     AIMStatus = "Starting"
	AIMStatusProgressing  AIMStatus = "Progressing"
	AIMStatusReady        AIMStatus = "Ready"
	AIMStatusRunning      AIMStatus = "Running"
	AIMStatusDegraded     AIMStatus = "Degraded"
	AIMStatusNotAvailable AIMStatus = "NotAvailable"
	AIMStatusFailed       AIMStatus = "Failed"
)

type StatusProvider

type StatusProvider interface {
	GetAIMStatus() AIMStatus
}

StatusProvider is implemented by status types that expose their AIMStatus.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL