Documentation
¶
Index ¶
- Constants
- func AddFinalizer(obj client.Object)
- func AppendUniqueImagePullSecrets(existing, additional []corev1.LocalObjectReference) []corev1.LocalObjectReference
- func ContainsFinalizer(obj client.Object) bool
- func CopySpec(source, destination client.Object) error
- func DetectGroveAvailability(ctx context.Context, mgr ctrl.Manager) bool
- func DetectKaiSchedulerAvailability(ctx context.Context, mgr ctrl.Manager) bool
- func DetectLWSAvailability(ctx context.Context, mgr ctrl.Manager) bool
- func DetectVolcanoAvailability(ctx context.Context, mgr ctrl.Manager) bool
- func EphemeralDeploymentEventFilter(config Config) predicate.Predicate
- func GetResourceHash(obj any) (string, error)
- func GetResourcesConfig(resources *v1alpha1.Resources) (*corev1.ResourceRequirements, error)
- func GetSpecHash(obj client.Object) (string, error)
- func HandleFinalizer[T client.Object](ctx context.Context, obj T, writer client.Writer, finalizer Finalizer[T]) (bool, error)
- func RemoveFinalizer(obj client.Object)
- func ScaleResource(ctx context.Context, scaleClient scale.ScalesGetter, ...) error
- func SortKeys(obj interface{}) interface{}
- func SyncResource[T client.Object](ctx context.Context, r Reconciler, parentResource client.Object, ...) (modified bool, res T, err error)
- type Config
- type ExcludedNamespacesInterface
- type Finalizer
- type GroveConfig
- type IngressConfig
- type KaiSchedulerConfig
- type LWSConfig
- type MpiRunConfig
- type RBACConfig
- type Reconciler
- type Resource
- type ResourceGenerator
- type SpecChangeResult
Constants ¶
const ( // NvidiaAnnotationHashKey indicates annotation name for last applied hash by the operator NvidiaAnnotationHashKey = "nvidia.com/last-applied-hash" // NvidiaAnnotationGenerationKey indicates annotation name for last applied generation by the operator // This is used to detect manual changes to resources NvidiaAnnotationGenerationKey = "nvidia.com/last-applied-generation" )
Variables ¶
This section is empty.
Functions ¶
func AddFinalizer ¶
func AppendUniqueImagePullSecrets ¶
func AppendUniqueImagePullSecrets(existing, additional []corev1.LocalObjectReference) []corev1.LocalObjectReference
AppendUniqueImagePullSecrets appends secrets to existing, skipping any that already exist by name.
func ContainsFinalizer ¶
func DetectGroveAvailability ¶
DetectGroveAvailability checks if Grove is available by checking if the Grove API group is registered This approach uses the discovery client which is simpler and more reliable
func DetectKaiSchedulerAvailability ¶
DetectKaiSchedulerAvailability checks if Kai-scheduler is available by checking if the scheduling.run.ai API group is registered This approach uses the discovery client which is simpler and more reliable
func DetectLWSAvailability ¶
DetectLWSAvailability checks if LWS is available by checking if the LWS API group is registered This approach uses the discovery client which is simpler and more reliable
func DetectVolcanoAvailability ¶
detectVolcanoAvailability checks if Volcano is available by checking if the Volcano API group is registered This approach uses the discovery client which is simpler and more reliable
func GetResourceHash ¶
GetResourceHash returns a consistent hash for the given object spec
func GetResourcesConfig ¶
func GetResourcesConfig(resources *v1alpha1.Resources) (*corev1.ResourceRequirements, error)
func HandleFinalizer ¶
func RemoveFinalizer ¶
func ScaleResource ¶
func ScaleResource(ctx context.Context, scaleClient scale.ScalesGetter, gvr schema.GroupVersionResource, namespace, name string, replicas int32) error
ScaleResource scales any Kubernetes resource using the Scale subresource
func SortKeys ¶
func SortKeys(obj interface{}) interface{}
SortKeys recursively sorts the keys of a map to ensure consistent serialization
func SyncResource ¶
func SyncResource[T client.Object](ctx context.Context, r Reconciler, parentResource client.Object, generateResource ResourceGenerator[T]) (modified bool, res T, err error)
Types ¶
type Config ¶
type Config struct {
// Enable resources filtering, only the resources belonging to the given namespace will be handled.
RestrictedNamespace string
Grove GroveConfig
LWS LWSConfig
KaiScheduler KaiSchedulerConfig
EtcdAddress string
NatsAddress string
IngressConfig IngressConfig
// ModelExpressURL is the URL of the Model Express server to inject into all pods
ModelExpressURL string
// PrometheusEndpoint is the URL of the Prometheus endpoint to use for metrics
PrometheusEndpoint string
MpiRun MpiRunConfig
// RBAC configuration for cross-namespace resource management
RBAC RBACConfig
// ExcludedNamespaces is a thread-safe set of namespaces to exclude (cluster-wide mode only)
ExcludedNamespaces ExcludedNamespacesInterface
// DiscoveryBackend is the discovery backend to use. Default is "kubernetes" for Kubernetes API service discovery. Set to "etcd" to use ETCD for discovery.
DiscoveryBackend string
// WebhooksEnabled indicates whether admission webhooks are enabled
// When true, controllers skip validation (webhooks handle it)
// When false, controllers perform validation (defense in depth)
WebhooksEnabled bool
}
func (Config) GetDiscoveryBackend ¶
type ExcludedNamespacesInterface ¶
ExcludedNamespacesInterface defines the interface for checking namespace exclusions
type GroveConfig ¶
type IngressConfig ¶
type IngressConfig struct {
VirtualServiceGateway string
IngressControllerClassName string
IngressControllerTLSSecret string
IngressHostSuffix string
}
func (*IngressConfig) UseVirtualService ¶
func (i *IngressConfig) UseVirtualService() bool
type KaiSchedulerConfig ¶
type KaiSchedulerConfig struct {
// Enabled is automatically determined by checking if Kai-scheduler CRDs are installed in the cluster
Enabled bool
}
type LWSConfig ¶
type LWSConfig struct {
// Enabled is automatically determined by checking if LWS CRDs are installed in the cluster
Enabled bool
}
type MpiRunConfig ¶
type MpiRunConfig struct {
// SecretName is the name of the secret containing the SSH key for MPI Run
SecretName string
}
type RBACConfig ¶
type RBACConfig struct {
// PlannerClusterRoleName is the name of the ClusterRole for planner (cluster-wide mode only)
PlannerClusterRoleName string
// DGDRProfilingClusterRoleName is the name of the ClusterRole for DGDR profiling jobs (cluster-wide mode only)
DGDRProfilingClusterRoleName string
}
RBACConfig holds configuration for RBAC management
type Reconciler ¶
type Reconciler interface {
client.Client
GetRecorder() record.EventRecorder
}
type Resource ¶
type Resource struct {
// contains filtered or unexported fields
}
func NewResource ¶
func (*Resource) GetServiceStatuses ¶
func (r *Resource) GetServiceStatuses() map[string]v1alpha1.ServiceReplicaStatus
type ResourceGenerator ¶
ResourceGenerator is a function that generates a resource. it must return the resource, a boolean indicating if the resource should be deleted, and an error if the resource should be deleted, the returned resource must contain the necessary information to delete it (name and namespace)
type SpecChangeResult ¶
type SpecChangeResult struct {
// NewHash is the hash to set in the annotation (nil if no update needed)
NewHash *string
// NewGeneration is the generation to set in the annotation
NewGeneration int64
// NeedsUpdate indicates whether the resource needs to be updated
NeedsUpdate bool
// ManualChangeDetected indicates whether a manual change was detected
ManualChangeDetected bool
}
SpecChangeResult contains the result of spec change detection
func GetSpecChangeResult ¶
GetSpecChangeResult determines if a resource needs to be updated by comparing the desired spec hash with the last applied hash annotation. It also tracks generation to detect manual changes.
Returns:
- SpecChangeResult with update information
- error if hash computation fails