Documentation
¶
Index ¶
- Constants
- Variables
- func AddOrOverrideTFClientMissingAnnotationsBeforePatch(pod *v1.Pod, tfInfo TensorFusionInfo)
- func AddTFDefaultClientConfBeforePatch(ctx context.Context, pod *v1.Pod, pool *tfv1.GPUPool, tfInfo TensorFusionInfo, ...)
- func AddTFHypervisorConfAfterTemplate(ctx context.Context, spec *v1.PodSpec, pool *tfv1.GPUPool)
- func AddTFNodeDiscoveryConfAfterTemplate(ctx context.Context, tmpl *v1.PodTemplateSpec, pool *tfv1.GPUPool, ...)
- func AddWorkerConfAfterTemplate(ctx context.Context, spec *v1.PodSpec, workerConfig *tfv1.WorkerConfig, ...) string
- func AppendTFWorkerLabelsAndAnnotationsAfterTemplate(podTmpl *v1.PodTemplate, workload *tfv1.TensorFusionWorkload, ...) (map[string]string, map[string]string)
- func CalculateExponentialBackoffWithJitter(retryCount int64) time.Duration
- func CompareAndGetObjectHash(hash string, obj ...any) (bool, string)
- func CurrentIP() string
- func CurrentNamespace() string
- func EqualConditionsDisregardTransitionTime(a, b []metav1.Condition) bool
- func EscapeJSONPointer(s string) string
- func ExtractPoolNameFromNodeLabel(node *tfv1.GPUNode) string
- func FindFirstLevelOwnerReference(obj metav1.Object) *metav1.OwnerReference
- func FindRootControllerRef(ctx context.Context, c client.Client, obj metav1.Object) (*metav1.OwnerReference, error)
- func FindRootOwnerReference(ctx context.Context, c client.Client, namespace string, obj metav1.Object) (*metav1.OwnerReference, error)
- func GPUResourcesFromAnnotations(annotations map[string]string) (*tfv1.Resources, error)
- func GPUResourcesToAnnotations(resources *tfv1.Resources) map[string]string
- func GetEnvOrDefault(key, defaultValue string) string
- func GetGPUResource(pod *corev1.Pod, isRequest bool) (tfv1.Resource, error)
- func GetInitialGPUNodeSelector() []string
- func GetObjectHash(objs ...any) string
- func GetPodControllerRef(ctx context.Context, c client.Client, pod *corev1.Pod) (*metav1.OwnerReference, error)
- func GetSelfServiceAccountNameFull() string
- func GetSelfServiceAccountNameShort() string
- func HandleFinalizer[T client.Object](ctx context.Context, obj T, r client.Client, ...) (shouldReturn bool, err error)
- func HasGPUResourceRequest(pod *corev1.Pod) bool
- func InitServiceAccountConfig()
- func IsLicensed() bool
- func IsPodConditionTrue(conditions []corev1.PodCondition, conditionType corev1.PodConditionType) bool
- func IsPodStopped(pod *corev1.Pod) bool
- func IsProgressiveMigration() bool
- func IsTensorFusionPod(pod *corev1.Pod) bool
- func IsTensorFusionWorker(pod *corev1.Pod) bool
- func LoadConfigFromFile[T any](filename string, target *T) error
- func NewShortID(length int) string
- func ReadServiceAccountToken() string
- func SetProgressiveMigration(isProgressiveMigration bool)
- func SetWorkerContainerSpec(container *v1.Container, workerConfig *tfv1.WorkerConfig, ...)
- func WatchConfigFileChanges(ctx context.Context, filename string) (<-chan []byte, error)
- type TensorFusionInfo
Constants ¶
const ( WatchConfigFileChangesInterval = 15 * time.Second ServiceAccountTokenPath = "/var/run/secrets/kubernetes.io/serviceaccount/token" )
Variables ¶
var ErrNextLoop = errors.New("stop this loop and return the associated Result object")
ErrNextLoop is not a real error. It forces the current reconciliation loop to stop and return the associated Result object
var ErrTerminateLoop = errors.New("stop this loop and do not requeue")
ErrTerminateLoop is not a real error. It forces the current reconciliation loop to stop
var GPUResourceNames = []corev1.ResourceName{
"nvidia.com/gpu",
"amd.com/gpu",
}
var IsTestMode = false
Functions ¶
func AddOrOverrideTFClientMissingAnnotationsBeforePatch ¶ added in v1.37.0
func AddOrOverrideTFClientMissingAnnotationsBeforePatch(pod *v1.Pod, tfInfo TensorFusionInfo)
func AddTFDefaultClientConfBeforePatch ¶ added in v1.37.0
func AddTFHypervisorConfAfterTemplate ¶ added in v1.37.0
func AddTFNodeDiscoveryConfAfterTemplate ¶ added in v1.37.0
func AddWorkerConfAfterTemplate ¶ added in v1.37.0
func AddWorkerConfAfterTemplate( ctx context.Context, spec *v1.PodSpec, workerConfig *tfv1.WorkerConfig, hypervisorConfig *tfv1.HypervisorConfig, workload *tfv1.TensorFusionWorkload, ) string
func AppendTFWorkerLabelsAndAnnotationsAfterTemplate ¶ added in v1.37.0
func AppendTFWorkerLabelsAndAnnotationsAfterTemplate( podTmpl *v1.PodTemplate, workload *tfv1.TensorFusionWorkload, containerName string, ) (map[string]string, map[string]string)
func CompareAndGetObjectHash ¶ added in v1.28.0
func CurrentNamespace ¶
func CurrentNamespace() string
func EqualConditionsDisregardTransitionTime ¶ added in v1.35.0
func EscapeJSONPointer ¶ added in v1.26.3
EscapeJSONPointer escapes a string according to the JSON Pointer spec (RFC 6901). It escapes '~' as '~0' and '/' as '~1'.
func ExtractPoolNameFromNodeLabel ¶ added in v1.33.1
func FindFirstLevelOwnerReference ¶ added in v1.35.0
func FindFirstLevelOwnerReference(obj metav1.Object) *metav1.OwnerReference
FindFirstLevelOwnerReference recursively finds the root owner reference for a given object (e.g. Pod).
func FindRootControllerRef ¶ added in v1.47.2
func FindRootControllerRef(ctx context.Context, c client.Client, obj metav1.Object) (*metav1.OwnerReference, error)
FindRootControllerRef recursively finds the root controller reference for a given object (e.g. Pod).
func FindRootOwnerReference ¶ added in v1.26.9
func FindRootOwnerReference(ctx context.Context, c client.Client, namespace string, obj metav1.Object) (*metav1.OwnerReference, error)
FindRootOwnerReference recursively finds the root owner reference for a given object (e.g. Pod).
func GPUResourcesFromAnnotations ¶ added in v1.46.3
func GPUResourcesToAnnotations ¶ added in v1.46.3
func GetEnvOrDefault ¶ added in v1.34.0
func GetGPUResource ¶ added in v1.35.0
func GetInitialGPUNodeSelector ¶ added in v1.44.0
func GetInitialGPUNodeSelector() []string
func GetObjectHash ¶
GetObjectHash generates a shorter FNV-1a hash for one or more objects
func GetPodControllerRef ¶ added in v1.47.2
func GetPodControllerRef(ctx context.Context, c client.Client, pod *corev1.Pod) (*metav1.OwnerReference, error)
GetPodControllerRef returns the controller reference for a Pod. For Pods that are indirectly controlled (e.g., by a Deployment or CronJob), return the indirect controller. For other cases, it returns the direct controller reference of the Pod. If the Pod has no controller reference, it returns nil.
func GetSelfServiceAccountNameFull ¶ added in v1.37.0
func GetSelfServiceAccountNameFull() string
func GetSelfServiceAccountNameShort ¶ added in v1.37.0
func GetSelfServiceAccountNameShort() string
func HandleFinalizer ¶
func HandleFinalizer[T client.Object]( ctx context.Context, obj T, r client.Client, deleteHook func(context.Context, T) (bool, error), ) (shouldReturn bool, err error)
HandleFinalizer ensures proper finalizer management for Kubernetes resources. It automatically adds the finalizer when needed, and removes it after successful cleanup. Returns (shouldReturn, err):
- shouldReturn: true if the caller should immediately return and wait for the next reconcile.
- err: any error encountered during update or deleteHook.
func HasGPUResourceRequest ¶ added in v1.39.0
func InitServiceAccountConfig ¶ added in v1.36.1
func InitServiceAccountConfig()
func IsLicensed ¶ added in v1.47.2
func IsLicensed() bool
func IsPodConditionTrue ¶
func IsPodConditionTrue(conditions []corev1.PodCondition, conditionType corev1.PodConditionType) bool
func IsPodStopped ¶ added in v1.37.0
func IsProgressiveMigration ¶ added in v1.39.0
func IsProgressiveMigration() bool
func IsTensorFusionPod ¶ added in v1.39.0
func IsTensorFusionWorker ¶ added in v1.39.1
func LoadConfigFromFile ¶ added in v1.34.0
func NewShortID ¶ added in v1.35.0
func ReadServiceAccountToken ¶ added in v1.36.1
func ReadServiceAccountToken() string
func SetProgressiveMigration ¶ added in v1.39.0
func SetProgressiveMigration(isProgressiveMigration bool)
For test purpose only
func SetWorkerContainerSpec ¶ added in v1.47.2
func SetWorkerContainerSpec( container *v1.Container, workerConfig *tfv1.WorkerConfig, hypervisorConfig *tfv1.HypervisorConfig, disabledFeatures string, sharedMemMode bool, )
SetWorkerContainerSpec configures the worker container with required settings
func WatchConfigFileChanges ¶ added in v1.34.0
WatchConfigFileChanges watches a file for changes and sends the file content through a channel when changes are detected. The channel will receive the raw file content as []byte whenever the file is modified. The watch interval is set to 15 seconds by default.
Types ¶
type TensorFusionInfo ¶ added in v1.37.0
type TensorFusionInfo struct {
Profile *tfv1.WorkloadProfileSpec
DynamicReplicas bool
EnabledReplicas *int32
WorkloadName string
PodControllerRef *metav1.OwnerReference
ContainerNames []string
}