Documentation
¶
Index ¶
- Constants
- func ApplyControllerLabelsToResult(planResult *PlanResult, labels map[string]string)
- func ApplyDesiredState(ctx context.Context, k8sClient client.Client, fieldOwner string, ...) error
- func DeriveStateFromErrors(errs []error) constants.AIMStatus
- func EmitConditionLogs(ctx context.Context, transitions []ConditionTransition, ...)
- func EmitConditionTransitions(recorder record.EventRecorder, obj runtime.Object, ...)
- func EmitRecurringEvents(recorder record.EventRecorder, obj runtime.Object, manager *ConditionManager)
- func EmitRecurringLogs(ctx context.Context, manager *ConditionManager)
- func IsReconciliationPaused(obj client.Object) bool
- func IsStateEngineError(err error) bool
- func MergeRuntimeConfigs(priority, base *aimv1alpha1.AIMRuntimeConfigCommon) *aimv1alpha1.AIMRuntimeConfigCommon
- func PropagateLabels(parent, child client.Object, config *aimv1alpha1.AIMRuntimeConfigCommon)
- func PropagateLabelsForResult(parent client.Object, planResult *PlanResult, ...)
- type ChildRef
- type ComponentHealth
- func GetHTTPRouteHealth(route *gatewayapiv1.HTTPRoute) ComponentHealth
- func GetJobHealth(job *batchv1.Job) ComponentHealth
- func GetPodsHealth(ctx context.Context, clientset kubernetes.Interface, podList *corev1.PodList) ComponentHealth
- func GetPvcHealth(pvc *corev1.PersistentVolumeClaim) ComponentHealth
- type ComponentHealthProvider
- type ConditionManager
- func (m *ConditionManager) AllConditionsTrue(conditionTypes ...string) bool
- func (m *ConditionManager) AnyConditionFalse(conditionTypes ...string) bool
- func (m *ConditionManager) AnyConditionTrue(conditionTypes ...string) bool
- func (m *ConditionManager) Conditions() []metav1.Condition
- func (m *ConditionManager) ConfigFor(condType string) ObservabilityConfig
- func (m *ConditionManager) Delete(condType string)
- func (m *ConditionManager) Get(condType string) *metav1.Condition
- func (m *ConditionManager) MarkFalse(condType, reason, message string, opts ...ObservabilityOption)
- func (m *ConditionManager) MarkTrue(condType, reason, message string, opts ...ObservabilityOption)
- func (m *ConditionManager) MarkUnknown(condType, reason, message string, opts ...ObservabilityOption)
- func (m *ConditionManager) Set(conditionType string, status metav1.ConditionStatus, reason, message string, ...)
- func (m *ConditionManager) SetCondition(cond metav1.Condition, opts ...ObservabilityOption)
- type ConditionTransition
- type ConfiguredCondition
- type DependencyType
- type DomainReconciler
- type ErrorCategory
- type EventLevel
- type EventMode
- type FetchResult
- func Fetch[T client.Object](ctx context.Context, c client.Client, key client.ObjectKey, obj T) FetchResult[T]
- func FetchList[T client.ObjectList](ctx context.Context, c client.Client, list T, opts ...client.ListOption) FetchResult[T]
- func FetchMergedRuntimeConfig(ctx context.Context, c client.Client, name, namespace string) FetchResult[*aimv1alpha1.AIMRuntimeConfigCommon]
- func (fr FetchResult[T]) HasError() bool
- func (fr FetchResult[T]) IsNotFound() bool
- func (fr FetchResult[T]) OK() bool
- func (fr FetchResult[T]) ToComponentHealth(component string, inspector func(T) ComponentHealth) ComponentHealth
- func (fr FetchResult[T]) ToComponentHealthWithContext(ctx context.Context, clientset kubernetes.Interface, component string, ...) ComponentHealth
- func (fr FetchResult[T]) ToDownstreamComponentHealth(component string, inspector func(T) ComponentHealth) ComponentHealth
- func (fr FetchResult[T]) ToUpstreamComponentHealth(component string, inspector func(T) ComponentHealth) ComponentHealth
- type InfrastructureError
- type LogMode
- type ManualStatusController
- type ObjectWithStatus
- type ObservabilityConfig
- type ObservabilityOption
- func AsError() ObservabilityOption
- func AsInfo() ObservabilityOption
- func AsWarning() ObservabilityOption
- func EventLevelToOption(level EventLevel) ObservabilityOption
- func Silent() ObservabilityOption
- func WithCriticalError() ObservabilityOption
- func WithDebugLog() ObservabilityOption
- func WithErrorLog() ObservabilityOption
- func WithEventMessage(msg string) ObservabilityOption
- func WithEventReason(reason string) ObservabilityOption
- func WithInfoLog() ObservabilityOption
- func WithLog(level int) ObservabilityOption
- func WithLogMessage(msg string) ObservabilityOption
- func WithNormalEvent() ObservabilityOption
- func WithRecurring() ObservabilityOption
- func WithRecurringErrorLog() ObservabilityOption
- func WithRecurringWarningEvent() ObservabilityOption
- func WithWarningEvent() ObservabilityOption
- type Pipeline
- type PlanResult
- func (pr *PlanResult) Apply(obj client.Object)
- func (pr *PlanResult) ApplyWithoutOwnerRef(obj client.Object)
- func (pr *PlanResult) Delete(obj client.Object)
- func (pr *PlanResult) GetToApply() []client.Object
- func (pr *PlanResult) GetToApplyWithoutOwnerRef() []client.Object
- func (pr *PlanResult) GetToDelete() []client.Object
- type ReconcileContext
- type RuntimeConfigRefProvider
- type StateEngineDecision
- type StateEngineError
- func CategorizeError(err error) StateEngineError
- func NewAuthError(reason, message string, cause error) StateEngineError
- func NewInfrastructureError(reason, message string, cause error) StateEngineError
- func NewInvalidSpecError(reason, message string, cause error) StateEngineError
- func NewMissingDownstreamDependencyError(reason, message string, cause error) StateEngineError
- func NewMissingUpstreamDependencyError(reason, message string, cause error) StateEngineError
- func NewResourceExhaustionError(reason, message string, cause error) StateEngineError
- type StatusDecorator
- type StatusHelper
- type StatusWithConditions
Constants ¶
const ( // Condition type constants ConditionTypeDependenciesReachable = "DependenciesReachable" ConditionTypeAuthValid = "AuthValid" ConditionTypeConfigValid = "ConfigValid" ConditionTypeReady = "Ready" // Component condition suffix (e.g., "ModelReady", "TemplateReady") ComponentConditionSuffix = "Ready" // DependenciesReachable condition reasons ReasonDependenciesReachable = "Reachable" ReasonDependenciesNotReachable = "InfrastructureError" MessageDependenciesReachable = "All dependencies are reachable" MessageDependenciesNotReachable = "Cannot reach dependencies" // AuthValid condition reasons ReasonAuthError = "AuthError" ReasonAuthValid = "AuthenticationValid" MessageAuthError = "Authentication or authorization failure" MessageAuthValid = "Authentication and authorization successful" // ConfigValid condition reasons ReasonInvalidSpec = "InvalidSpec" ReasonMissingRef = "ReferenceNotFound" ReasonConfigValid = "ConfigurationValid" MessageInvalidSpec = "Configuration validation failed" MessageMissingRef = "Referenced resource not found" MessageConfigValid = "Configuration is valid" // Ready condition reasons ReasonAllComponentsReady = "AllComponentsReady" ReasonComponentsNotReady = "ComponentsNotReady" ReasonProgressing = "Progressing" MessageAllComponentsReady = "All components are ready" MessageComponentsNotReady = "Some components are not ready" MessageProgressing = "Waiting for components to become ready" MessageInfraError = "Infrastructure error - waiting for retry" )
const (
DefaultRuntimeConfigName = "default"
)
Variables ¶
This section is empty.
Functions ¶
func ApplyControllerLabelsToResult ¶
func ApplyControllerLabelsToResult(planResult *PlanResult, labels map[string]string)
ApplyControllerLabelsToResult adds controller-specific labels to all resources in the PlanResult. These labels are added to both the resource metadata and to Job pod templates (if applicable). Labels are merged with existing labels - existing labels are not overwritten.
func ApplyDesiredState ¶
func ApplyDesiredState( ctx context.Context, k8sClient client.Client, fieldOwner string, scheme *runtime.Scheme, desired []client.Object, owner client.Object, ) error
ApplyDesiredState applies the desired set of objects via Server-Side Apply (SSA). Objects are applied in deterministic order: by GVK, then namespace, then name. If owner is provided, owner references will be set on all objects before applying.
func DeriveStateFromErrors ¶
DeriveStateFromErrors infers an AIMStatus from a list of raw errors. This is used when ComponentHealth.State is nil. Errors are categorized on-the-fly to determine the appropriate state.
Derivation rules:
- No errors → Ready
- User-fixable errors (InvalidSpec, MissingReference, Auth, Infrastructure) → Degraded
- MissingDependency errors → Progressing (waiting for internal deps)
- Multiple categories → "worst" status wins (Degraded > Progressing > Ready)
Note: Failed is reserved for truly terminal states (e.g., all children permanently failed). User-fixable errors use Degraded because the resource can recover once the user fixes the issue.
func EmitConditionLogs ¶
func EmitConditionLogs( ctx context.Context, transitions []ConditionTransition, manager *ConditionManager, )
EmitConditionLogs logs condition transitions based on their observability config
func EmitConditionTransitions ¶
func EmitConditionTransitions( recorder record.EventRecorder, obj runtime.Object, transitions []ConditionTransition, manager *ConditionManager, )
func EmitRecurringEvents ¶
func EmitRecurringEvents( recorder record.EventRecorder, obj runtime.Object, manager *ConditionManager, )
EmitRecurringEvents emits events for all conditions configured with EventAlways, regardless of whether they transitioned
func EmitRecurringLogs ¶
func EmitRecurringLogs( ctx context.Context, manager *ConditionManager, )
EmitRecurringLogs logs all conditions configured with LogAlways, regardless of whether they transitioned
func IsReconciliationPaused ¶
IsReconciliationPaused returns true if the resource has the reconciliation-paused annotation set to "true". When paused, the controller skips all reconciliation logic and returns immediately.
func IsStateEngineError ¶
IsStateEngineError returns true if the error is a StateEngineError.
func MergeRuntimeConfigs ¶
func MergeRuntimeConfigs(priority, base *aimv1alpha1.AIMRuntimeConfigCommon) *aimv1alpha1.AIMRuntimeConfigCommon
MergeRuntimeConfigs merges two AIMRuntimeConfigCommon structs, with the priority config taking precedence over the base config. Uses key-based merging for env vars.
Parameters:
- priority: The config with higher priority (overrides base values)
- base: The config with lower priority (provides defaults)
func PropagateLabels ¶
func PropagateLabels(parent, child client.Object, config *aimv1alpha1.AIMRuntimeConfigCommon)
PropagateLabels propagates labels from a parent resource to a child resource. AIM system labels (aim.eai.amd.com/*) are always propagated to maintain traceability across the resource hierarchy. User-defined labels are propagated based on the runtime config's label propagation settings.
Parameters:
- parent: The source resource whose labels should be propagated
- child: The target resource that will receive the propagated labels
- config: The runtime config common spec containing label propagation settings
The function does nothing if the parent has no labels. The child's existing labels are preserved and only new labels are added.
Special handling for Jobs: Labels are also propagated to the PodTemplateSpec.
func PropagateLabelsForResult ¶
func PropagateLabelsForResult(parent client.Object, planResult *PlanResult, config *aimv1alpha1.AIMRuntimeConfigCommon)
Types ¶
type ComponentHealth ¶
type ComponentHealth struct {
// Component is the logical name of this component:
// - Physical dependencies: "Model", "Cache", "Template"
// - Child resources: "Workload"
// - Virtual components: "MetadataQuery", "ImageRegistry", "ExternalAPI"
Component string
// State is the current state of this component (optional).
// If empty (""), the state will be derived from Errors using DeriveStateFromErrors.
// Set explicitly when semantic meaning matters (e.g., Progressing vs Failed for missing deps).
State constants.AIMStatus
// Reason is a machine-readable reason code (optional).
// If empty and Errors is non-empty, will be derived from the first categorized error.
// Examples: "NotFound", "ImagePullBackOff", "InvalidCredentials"
Reason string
// Message is a human-readable description (optional).
// If empty and Errors is non-empty, will be derived from the first categorized error.
Message string
// Errors are the raw errors that caused this state.
// These will be categorized by the state engine to drive parent-level conditions
// (ConfigValid, AuthValid, DependenciesReachable).
Errors []error
// DependencyType indicates whether this is an upstream or downstream dependency.
// This is used to determine whether a not-ready component should result in Pending (upstream)
// or Progressing (downstream) status.
DependencyType DependencyType
// ChildRef optionally identifies a specific child resource for fine-grained tracking.
// When set, this ComponentHealth represents a specific pod/deployment/etc.
// When nil, this represents an aggregated component view.
ChildRef *ChildRef
}
ComponentHealth describes the health of a component (dependency, child resource, or virtual component). It unifies tracking for logical components (model, template, cache), physical resources (pods, deployments), and virtual components (external API queries, image registry access, etc.).
func GetHTTPRouteHealth ¶
func GetHTTPRouteHealth(route *gatewayapiv1.HTTPRoute) ComponentHealth
GetHTTPRouteHealth evaluates the health of an HTTPRoute. An HTTPRoute is ready when at least one parent has accepted it.
func GetJobHealth ¶
func GetJobHealth(job *batchv1.Job) ComponentHealth
func GetPodsHealth ¶
func GetPodsHealth(ctx context.Context, clientset kubernetes.Interface, podList *corev1.PodList) ComponentHealth
func GetPvcHealth ¶
func GetPvcHealth(pvc *corev1.PersistentVolumeClaim) ComponentHealth
func (ComponentHealth) GetMessage ¶
func (ch ComponentHealth) GetMessage() string
GetMessage returns the component's message, deriving it from the first error if not explicitly set. Note: Error-derived reason/message will be implemented by the state engine after categorization.
func (ComponentHealth) GetReason ¶
func (ch ComponentHealth) GetReason() string
GetReason returns the component's reason, deriving it from the first error if not explicitly set. Note: Error-derived reason/message will be implemented by the state engine after categorization.
func (ComponentHealth) GetState ¶
func (ch ComponentHealth) GetState() constants.AIMStatus
GetState returns the component's state, deriving it from errors if not explicitly set.
type ComponentHealthProvider ¶
type ComponentHealthProvider interface {
GetComponentHealth() []ComponentHealth
}
ComponentHealthProvider is implemented by observation types that surface per-component health.
type ConditionManager ¶
type ConditionManager struct {
// contains filtered or unexported fields
}
ConditionManager wraps a slice of metav1.Condition and provides helpers.
func NewConditionManager ¶
func NewConditionManager(existing []metav1.Condition) *ConditionManager
func (*ConditionManager) AllConditionsTrue ¶
func (m *ConditionManager) AllConditionsTrue(conditionTypes ...string) bool
AllConditionsTrue checks if all the given condition types are true
func (*ConditionManager) AnyConditionFalse ¶
func (m *ConditionManager) AnyConditionFalse(conditionTypes ...string) bool
AnyConditionFalse checks if any of the given condition types are false
func (*ConditionManager) AnyConditionTrue ¶
func (m *ConditionManager) AnyConditionTrue(conditionTypes ...string) bool
AnyConditionTrue checks if any of the given condition types are true
func (*ConditionManager) Conditions ¶
func (m *ConditionManager) Conditions() []metav1.Condition
func (*ConditionManager) ConfigFor ¶
func (m *ConditionManager) ConfigFor(condType string) ObservabilityConfig
func (*ConditionManager) Delete ¶
func (m *ConditionManager) Delete(condType string)
Delete removes a condition by type if it exists. This is useful when a condition becomes irrelevant (e.g., cache condition when caching is disabled).
func (*ConditionManager) MarkFalse ¶
func (m *ConditionManager) MarkFalse(condType, reason, message string, opts ...ObservabilityOption)
func (*ConditionManager) MarkTrue ¶
func (m *ConditionManager) MarkTrue(condType, reason, message string, opts ...ObservabilityOption)
func (*ConditionManager) MarkUnknown ¶
func (m *ConditionManager) MarkUnknown(condType, reason, message string, opts ...ObservabilityOption)
func (*ConditionManager) Set ¶
func (m *ConditionManager) Set(conditionType string, status metav1.ConditionStatus, reason, message string, opts ...ObservabilityOption)
func (*ConditionManager) SetCondition ¶
func (m *ConditionManager) SetCondition(cond metav1.Condition, opts ...ObservabilityOption)
SetCondition sets or updates a condition by type.
type ConditionTransition ¶
type ConditionTransition struct {
Old *metav1.Condition // nil if this condition is new
New *metav1.Condition // nil if this condition was removed
}
func DiffConditionTransitions ¶
func DiffConditionTransitions(oldConditions, newConditions []metav1.Condition) []ConditionTransition
DiffConditionTransitions returns transitions between old and new condition sets. It compares by Type, and considers a transition interesting if Status or Reason changed.
type ConfiguredCondition ¶
type ConfiguredCondition struct {
metav1.Condition
Config ObservabilityConfig
}
ConfiguredCondition extends the metav1.Condition with observability configuration that determines how this condition or a change in its state should be reported.
type DependencyType ¶
type DependencyType string
DependencyType indicates whether a component is an upstream or downstream dependency.
const ( // DependencyTypeUpstream indicates this component is an upstream dependency that this controller depends on. // Examples: templates, runtime configs, secrets, configmaps. // When upstream dependencies are not ready, the resource should be Pending. DependencyTypeUpstream DependencyType = "Upstream" // DependencyTypeDownstream indicates this component is a downstream resource that this controller creates. // Examples: artifacts, pods, jobs, child resources. // When downstream dependencies are not ready (being created), the resource should be Progressing. DependencyTypeDownstream DependencyType = "Downstream" // DependencyTypeUnspecified means the dependency type is not specified (for backward compatibility). DependencyTypeUnspecified DependencyType = "" )
type DomainReconciler ¶
type DomainReconciler[T ObjectWithStatus[S], S StatusWithConditions, F any, Obs any] interface { // FetchRemoteState hits the API via client and returns the fetched objects. // Errors are captured in FetchResult types, not returned - this ensures ComposeState always runs. FetchRemoteState(ctx context.Context, c client.Client, reconcileCtx ReconcileContext[T]) F // ComposeState interprets the fetched objects into a meaningful observation. // All errors should be reflected in ComponentHealth via the observation, not returned. ComposeState(ctx context.Context, reconcileCtx ReconcileContext[T], fetched F) Obs // PlanResources must be pure: no client calls, just derive desired state changes based on the object + observed state. // Returns PlanResult with objects to apply/delete. Errors during planning should be rare (programming errors). PlanResources(ctx context.Context, reconcileCtx ReconcileContext[T], obs Obs) PlanResult }
DomainReconciler is implemented by domain-specific logic for a CRD.
type ErrorCategory ¶
type ErrorCategory int
ErrorCategory classifies high-level error semantics for the state engine.
const ( ErrorCategoryUnknown ErrorCategory = iota ErrorCategoryInfrastructure ErrorCategoryAuth ErrorCategoryMissingDownstreamDependency ErrorCategoryMissingUpstreamDependency ErrorCategoryInvalidSpec ErrorCategoryResourceExhaustion )
func (ErrorCategory) String ¶
func (c ErrorCategory) String() string
String returns the human-readable name of the error category.
type EventLevel ¶
type EventLevel string
const ( LevelNone EventLevel = "" LevelNormal EventLevel = EventLevel(corev1.EventTypeNormal) LevelWarning EventLevel = EventLevel(corev1.EventTypeWarning) )
type FetchResult ¶
FetchResult wraps a fetched value and its error, simplifying fetch result handling. Use the Fetch and FetchList helpers to create FetchResults.
func Fetch ¶
func Fetch[T client.Object](ctx context.Context, c client.Client, key client.ObjectKey, obj T) FetchResult[T]
Fetch retrieves a single object from the Kubernetes API and wraps the result. This helper reduces boilerplate when building fetch structs.
Example:
type MyFetch struct {
Model FetchResult[*aimv1.AIMModel]
Template FetchResult[*aimv1.AIMServiceTemplate]
}
func FetchRemoteState(ctx, client, obj) (MyFetch, error) {
return MyFetch{
Model: Fetch(ctx, client, modelKey, &aimv1.AIMModel{}),
Template: Fetch(ctx, client, templateKey, &aimv1.AIMServiceTemplate{}),
}, nil
}
func FetchList ¶
func FetchList[T client.ObjectList](ctx context.Context, c client.Client, list T, opts ...client.ListOption) FetchResult[T]
FetchList retrieves a list of objects from the Kubernetes API and wraps the result. This helper reduces boilerplate when building fetch structs for list operations.
Example:
type MyFetch struct {
Pods FetchResult[*corev1.PodList]
}
func FetchRemoteState(ctx, client, obj) (MyFetch, error) {
return MyFetch{
Pods: FetchList(ctx, client, &corev1.PodList{}, client.InNamespace(ns)),
}, nil
}
// Access in ComposeState:
for _, pod := range fetch.Pods.Value.Items { ... }
func FetchMergedRuntimeConfig ¶
func FetchMergedRuntimeConfig(ctx context.Context, c client.Client, name, namespace string) FetchResult[*aimv1alpha1.AIMRuntimeConfigCommon]
FetchMergedRuntimeConfig fetches and merges namespace and cluster-scoped runtime configs. Returns a FetchResult containing the merged config.
Behavior:
- If both namespace and cluster configs exist, they are merged (namespace takes precedence)
- If only one exists, it is returned
- If neither exists and name is "default", returns nil config with no error (OK)
- If neither exists and name is not "default", returns NotFound error
func (FetchResult[T]) HasError ¶
func (fr FetchResult[T]) HasError() bool
HasError returns true if there was an error.
func (FetchResult[T]) IsNotFound ¶
func (fr FetchResult[T]) IsNotFound() bool
IsNotFound returns true if the error is a NotFound error.
func (FetchResult[T]) OK ¶
func (fr FetchResult[T]) OK() bool
OK returns true if there was no error.
func (FetchResult[T]) ToComponentHealth ¶
func (fr FetchResult[T]) ToComponentHealth(component string, inspector func(T) ComponentHealth) ComponentHealth
ToComponentHealth converts a FetchResult into ComponentHealth with automatic error handling. Fetch errors are passed through as raw errors (categorized later by the state engine). If the fetch succeeded, the inspector function determines the semantic state.
func (FetchResult[T]) ToComponentHealthWithContext ¶
func (fr FetchResult[T]) ToComponentHealthWithContext( ctx context.Context, clientset kubernetes.Interface, component string, inspector func(context.Context, kubernetes.Interface, T) ComponentHealth, ) ComponentHealth
ToComponentHealthWithContext converts a FetchResult into ComponentHealth with automatic error handling. This variant provides context and Kubernetes clientset to the inspector function. Use this for health inspectors that need to fetch additional information (like pod logs).
func (FetchResult[T]) ToDownstreamComponentHealth ¶
func (fr FetchResult[T]) ToDownstreamComponentHealth(component string, inspector func(T) ComponentHealth) ComponentHealth
ToDownstreamComponentHealth converts a FetchResult for a downstream dependency into ComponentHealth. Downstream dependencies are resources that this controller creates (pods, jobs, child resources, etc.). NotFound errors for downstream dependencies are categorized as MissingDownstreamDependency (retriable/expected).
func (FetchResult[T]) ToUpstreamComponentHealth ¶
func (fr FetchResult[T]) ToUpstreamComponentHealth(component string, inspector func(T) ComponentHealth) ComponentHealth
ToUpstreamComponentHealth converts a FetchResult for an upstream dependency into ComponentHealth. Upstream dependencies are resources that this controller depends on (templates, configs, secrets, etc.). NotFound errors for upstream dependencies are categorized as MissingUpstreamDependency (non-retriable, but can be resolved when they are created via external actions).
type InfrastructureError ¶
type InfrastructureError struct {
// Count is the number of infrastructure errors encountered
Count int
// Errors contains the detailed error information
Errors []error
}
InfrastructureError represents retriable infrastructure failures (network, API server, etc.). It provides a stable error type for controller-runtime's exponential backoff, while preserving detailed error information for logging and debugging.
func (InfrastructureError) Error ¶
func (e InfrastructureError) Error() string
func (InfrastructureError) Unwrap ¶
func (e InfrastructureError) Unwrap() []error
type ManualStatusController ¶
type ManualStatusController[T ObjectWithStatus[S], S StatusWithConditions, Obs any] interface { // SetStatus mutates obj.Status via the ConditionManager based on observations. // Component health and errors are available via obs (if it implements ComponentHealthProvider). SetStatus(status S, cm *ConditionManager, obs Obs) }
ManualStatusController takes full ownership of status & conditions. When implemented, the StateEngine is NOT called.
type ObjectWithStatus ¶
type ObjectWithStatus[S StatusWithConditions] interface { runtime.Object client.Object GetStatus() S }
ObjectWithStatus is a constraint for objects that have a Status field with conditions.
type ObservabilityConfig ¶
type ObservabilityConfig struct {
// contains filtered or unexported fields
}
ObservabilityConfig controls how condition changes are observed (events and logs). This configuration determines when and how to emit Kubernetes events and controller logs for condition state changes.
type ObservabilityOption ¶
type ObservabilityOption func(*ObservabilityConfig)
func AsError ¶
func AsError() ObservabilityOption
AsError is for critical/persistent errors. Emits error log (V(0)) and warning event EVERY reconcile (recurring).
func AsInfo ¶
func AsInfo() ObservabilityOption
AsInfo is the default for informational/progress updates. Emits info log (V(1)) and normal event on transition only.
func AsWarning ¶
func AsWarning() ObservabilityOption
AsWarning is for transient errors or degraded states. Emits error log (V(0)) and warning event on transition only.
func EventLevelToOption ¶
func EventLevelToOption(level EventLevel) ObservabilityOption
EventLevelToOption converts the old EventLevel constants to ObservabilityOptions. This provides backwards compatibility for existing code.
func Silent ¶
func Silent() ObservabilityOption
Silent explicitly marks a condition as having no events or logs. Use this to suppress observability for low-priority conditions.
func WithCriticalError ¶
func WithCriticalError() ObservabilityOption
func WithDebugLog ¶
func WithDebugLog() ObservabilityOption
WithDebugLog logs at V(1) - only visible with -zap-log-level=debug. Use for verbose operational details.
func WithErrorLog ¶
func WithErrorLog() ObservabilityOption
WithErrorLog logs at V(0) - always visible. Use for errors that must be seen.
func WithEventMessage ¶
func WithEventMessage(msg string) ObservabilityOption
func WithEventReason ¶
func WithEventReason(reason string) ObservabilityOption
func WithInfoLog ¶
func WithInfoLog() ObservabilityOption
WithInfoLog logs at V(0) - visible at default info level. Use for important state transitions like Ready conditions.
func WithLog ¶
func WithLog(level int) ObservabilityOption
func WithLogMessage ¶
func WithLogMessage(msg string) ObservabilityOption
func WithNormalEvent ¶
func WithNormalEvent() ObservabilityOption
func WithRecurring ¶
func WithRecurring() ObservabilityOption
WithRecurring makes any event/log happen every reconcile (not just on transition)
func WithRecurringErrorLog ¶
func WithRecurringErrorLog() ObservabilityOption
func WithRecurringWarningEvent ¶
func WithRecurringWarningEvent() ObservabilityOption
func WithWarningEvent ¶
func WithWarningEvent() ObservabilityOption
type Pipeline ¶
type Pipeline[T ObjectWithStatus[S], S StatusWithConditions, F any, Obs any] struct { Client client.Client StatusClient client.StatusWriter // usually mgr.GetClient().Status() Recorder record.EventRecorder Reconciler DomainReconciler[T, S, F, Obs] Scheme *runtime.Scheme ControllerName string Clientset kubernetes.Interface // Optional: for health inspectors that need additional K8s API access }
Pipeline wires a domain reconciler with controller-runtime utilities.
func (*Pipeline[T, S, F, Obs]) GetFullName ¶
GetFullName returns the full AIM controller identifier (used for app.kubernetes.io/managed-by label). Example: "model" -> "aim-model-controller"
func (*Pipeline[T, S, F, Obs]) GetKubernetesName ¶
GetKubernetesName returns the Kubernetes controller name (used in SetupWithManager's .Named()). Example: "model" -> "model-controller"
type PlanResult ¶
type PlanResult struct {
// RequeueAfter signals to the controller that reconciliation should be retried
// after the specified duration. Use this when the reconciler cannot proceed
// (e.g., blocked by a rate limit) but should retry later.
RequeueAfter time.Duration
// contains filtered or unexported fields
}
PlanResult contains the desired state changes from the PlanResources phase.
func (*PlanResult) Apply ¶
func (pr *PlanResult) Apply(obj client.Object)
Apply adds an object to be applied with an owner reference (default behavior). The object will be garbage collected when the owner is deleted.
func (*PlanResult) ApplyWithoutOwnerRef ¶
func (pr *PlanResult) ApplyWithoutOwnerRef(obj client.Object)
ApplyWithoutOwnerRef adds an object to be applied without an owner reference. Use this for shared resources or resources that should outlive the owner.
func (*PlanResult) Delete ¶
func (pr *PlanResult) Delete(obj client.Object)
Delete adds an object to be deleted
func (*PlanResult) GetToApply ¶
func (pr *PlanResult) GetToApply() []client.Object
GetToApply returns the objects to be applied with owner references (for testing)
func (*PlanResult) GetToApplyWithoutOwnerRef ¶
func (pr *PlanResult) GetToApplyWithoutOwnerRef() []client.Object
GetToApplyWithoutOwnerRef returns the objects to be applied without owner references (for testing)
func (*PlanResult) GetToDelete ¶
func (pr *PlanResult) GetToDelete() []client.Object
GetToDelete returns the objects to be deleted (for testing)
type ReconcileContext ¶
type ReconcileContext[T client.Object] struct { Object T MergedRuntimeConfig FetchResult[*aimv1alpha1.AIMRuntimeConfigCommon] }
type RuntimeConfigRefProvider ¶
type RuntimeConfigRefProvider interface {
GetRuntimeConfigRef() aimv1alpha1.RuntimeConfigRef
}
type StateEngineDecision ¶
type StateEngineDecision struct {
// ShouldApply is false if ConfigValid/AuthValid/DependenciesReachable is False
ShouldApply bool
// ShouldRequeue is true if infrastructure errors are present (triggers exponential backoff)
ShouldRequeue bool
// RequeueError is the error to return for controller-runtime requeue
RequeueError error
}
StateEngineDecision contains the state engine's analysis and reconciliation directives.
type StateEngineError ¶
type StateEngineError interface {
error
Category() ErrorCategory
Reason() string
UserMessage() string
}
StateEngineError is a structured error for the state engine layer.
func CategorizeError ¶
func CategorizeError(err error) StateEngineError
CategorizeError inspects a raw error and categorizes it as a StateEngineError. This function performs deep inspection to determine the error category: - Kubernetes API errors (NotFound, Forbidden, Unauthorized, etc.) - Network errors (connection refused, timeout, DNS failures) - HTTP status codes (401, 403, 5xx)
This is the SINGLE place where error categorization happens.
func NewAuthError ¶
func NewAuthError(reason, message string, cause error) StateEngineError
NewAuthError creates an error for authentication or authorization failures (e.g., missing credentials, insufficient permissions). These errors should be surfaced in status conditions as they indicate configuration issues.
Parameters:
- reason: Machine-readable reason code (e.g., "InsufficientPermissions")
- message: Human-readable description for users
- cause: Underlying error that caused this issue (may be nil)
func NewInfrastructureError ¶
func NewInfrastructureError(reason, message string, cause error) StateEngineError
NewInfrastructureError creates an error for transient infrastructure issues (e.g., network failures, API server unavailability). These errors should typically cause reconciliation retry without affecting status conditions.
Parameters:
- reason: Machine-readable reason code (e.g., "NetworkFailure")
- message: Human-readable description for users
- cause: Underlying error that caused this issue (may be nil)
func NewInvalidSpecError ¶
func NewInvalidSpecError(reason, message string, cause error) StateEngineError
NewInvalidSpecError creates an error for invalid user-provided specifications (e.g., validation failures, malformed configuration). These errors should be surfaced in status conditions as they require user intervention to fix.
Parameters:
- reason: Machine-readable reason code (e.g., "InvalidConfiguration")
- message: Human-readable description for users
- cause: Underlying error that caused this issue (may be nil)
func NewMissingDownstreamDependencyError ¶
func NewMissingDownstreamDependencyError(reason, message string, cause error) StateEngineError
NewMissingDownstreamDependencyError creates an error for missing internal dependencies that the controller is waiting for (e.g., a template being created, a pod starting). These are transient states that should self-heal - the controller will keep progressing.
Parameters:
- reason: Machine-readable reason code (e.g., "TemplateNotReady")
- message: Human-readable description for users
- cause: Underlying error that caused this issue (may be nil)
func NewMissingUpstreamDependencyError ¶
func NewMissingUpstreamDependencyError(reason, message string, cause error) StateEngineError
NewMissingUpstreamDependencyError creates an error for user-referenced resources that don't exist (e.g., a runtimeConfigName that points to a non-existent config, a secret reference). These are configuration errors that require user intervention - the spec is valid but the referenced resource is missing. Sets ConfigValid=False.
Parameters:
- reason: Machine-readable reason code (e.g., "ConfigNotFound")
- message: Human-readable description for users
- cause: Underlying error that caused this issue (may be nil)
func NewResourceExhaustionError ¶
func NewResourceExhaustionError(reason, message string, cause error) StateEngineError
NewResourceExhaustionError creates a resource exhaustion error. Use this for errors related to resource limits being hit: - Disk full / no space left on device - Out of memory (OOM) - Storage quota exceeded These errors require user intervention (increase PVC size, memory limits, etc.) and will not auto-recover through retries.
type StatusDecorator ¶
type StatusDecorator[T ObjectWithStatus[S], S StatusWithConditions, Obs any] interface { // DecorateStatus can set domain-specific status fields and optional conditions. It can be used to extend // and override the status and conditions that are set by the StateEngine. DecorateStatus(status S, cm *ConditionManager, obs Obs) }
StatusDecorator lets a reconciler *extend* status, but not replace it.
type StatusHelper ¶
type StatusHelper struct {
// contains filtered or unexported fields
}
StatusHelper assists with setting repetitive broad status categories
func NewStatusHelper ¶
func NewStatusHelper( status StatusWithConditions, cm *ConditionManager, ) *StatusHelper
func (*StatusHelper) Degraded ¶
func (h *StatusHelper) Degraded(reason, msg string)
func (*StatusHelper) Failed ¶
func (h *StatusHelper) Failed(reason, msg string)
func (*StatusHelper) NotAvailable ¶
func (h *StatusHelper) NotAvailable(reason, msg string)
func (*StatusHelper) Progressing ¶
func (h *StatusHelper) Progressing(reason, msg string)
func (*StatusHelper) Ready ¶
func (h *StatusHelper) Ready(reason, msg string)