Documentation
      ¶
    
    
  
    
  
    Index ¶
- Constants
 - Variables
 - func AddOrOverrideTFClientMissingAnnotationsBeforePatch(pod *v1.Pod, tfInfo TensorFusionInfo)
 - func AddTFDefaultClientConfBeforePatch(ctx context.Context, pod *v1.Pod, pool *tfv1.GPUPool, tfInfo TensorFusionInfo, ...)
 - func AddTFHypervisorConfAfterTemplate(ctx context.Context, spec *v1.PodSpec, pool *tfv1.GPUPool)
 - func AddTFNodeDiscoveryConfAfterTemplate(ctx context.Context, tmpl *v1.PodTemplateSpec, pool *tfv1.GPUPool, ...)
 - func AddWorkerConfAfterTemplate(ctx context.Context, spec *v1.PodSpec, workerConfig *tfv1.WorkerConfig, ...) string
 - func AppendTFWorkerLabelsAndAnnotationsAfterTemplate(podTmpl *v1.PodTemplate, workload *tfv1.TensorFusionWorkload, ...) (map[string]string, map[string]string)
 - func CalculateExponentialBackoffWithJitter(retryCount int64) time.Duration
 - func CompareAndGetObjectHash(hash string, obj ...any) (bool, string)
 - func CurrentIP() string
 - func CurrentNamespace() string
 - func EqualConditionsDisregardTransitionTime(a, b []metav1.Condition) bool
 - func EscapeJSONPointer(s string) string
 - func ExtractPoolNameFromNodeLabel(node *tfv1.GPUNode) string
 - func FindFirstLevelOwnerReference(obj metav1.Object) *metav1.OwnerReference
 - func FindRootOwnerReference(ctx context.Context, c client.Client, namespace string, obj metav1.Object) (*metav1.OwnerReference, error)
 - func GetEnvOrDefault(key, defaultValue string) string
 - func GetGPUResource(pod *corev1.Pod, isRequest bool) (tfv1.Resource, error)
 - func GetInitialGPUNodeSelector() []string
 - func GetObjectHash(objs ...any) string
 - func GetSelfServiceAccountNameFull() string
 - func GetSelfServiceAccountNameShort() string
 - func HandleFinalizer[T client.Object](ctx context.Context, obj T, r client.Client, ...) (shouldReturn bool, err error)
 - func HasGPUResourceRequest(pod *corev1.Pod) bool
 - func InitServiceAccountConfig()
 - func IsPodConditionTrue(conditions []corev1.PodCondition, conditionType corev1.PodConditionType) bool
 - func IsPodStopped(pod *corev1.Pod) bool
 - func IsProgressiveMigration() bool
 - func IsTensorFusionPod(pod *corev1.Pod) bool
 - func IsTensorFusionWorker(pod *corev1.Pod) bool
 - func LoadConfigFromFile[T any](filename string, target *T) error
 - func NewShortID(length int) string
 - func ReadServiceAccountToken() string
 - func SetProgressiveMigration(isProgressiveMigration bool)
 - func WatchConfigFileChanges(ctx context.Context, filename string) (<-chan []byte, error)
 - type TensorFusionInfo
 
Constants ¶
const ( WatchConfigFileChangesInterval = 15 * time.Second ServiceAccountTokenPath = "/var/run/secrets/kubernetes.io/serviceaccount/token" )
Variables ¶
var ErrNextLoop = errors.New("stop this loop and return the associated Result object")
    ErrNextLoop is not a real error. It forces the current reconciliation loop to stop and return the associated Result object
var ErrTerminateLoop = errors.New("stop this loop and do not requeue")
    ErrTerminateLoop is not a real error. It forces the current reconciliation loop to stop
var GPUResourceNames = []corev1.ResourceName{
	"nvidia.com/gpu",
	"amd.com/gpu",
}
    var IsTestMode = false
    Functions ¶
func AddOrOverrideTFClientMissingAnnotationsBeforePatch ¶ added in v1.37.0
func AddOrOverrideTFClientMissingAnnotationsBeforePatch(pod *v1.Pod, tfInfo TensorFusionInfo)
func AddTFDefaultClientConfBeforePatch ¶ added in v1.37.0
func AddTFHypervisorConfAfterTemplate ¶ added in v1.37.0
func AddTFNodeDiscoveryConfAfterTemplate ¶ added in v1.37.0
func AddWorkerConfAfterTemplate ¶ added in v1.37.0
func AddWorkerConfAfterTemplate(ctx context.Context, spec *v1.PodSpec, workerConfig *tfv1.WorkerConfig, hypervisorConfig *tfv1.HypervisorConfig, workload *tfv1.TensorFusionWorkload) string
func AppendTFWorkerLabelsAndAnnotationsAfterTemplate ¶ added in v1.37.0
func AppendTFWorkerLabelsAndAnnotationsAfterTemplate( podTmpl *v1.PodTemplate, workload *tfv1.TensorFusionWorkload, containerName string, ) (map[string]string, map[string]string)
func CompareAndGetObjectHash ¶ added in v1.28.0
func CurrentNamespace ¶
func CurrentNamespace() string
func EqualConditionsDisregardTransitionTime ¶ added in v1.35.0
func EscapeJSONPointer ¶ added in v1.26.3
EscapeJSONPointer escapes a string according to the JSON Pointer spec (RFC 6901). It escapes '~' as '~0' and '/' as '~1'.
func ExtractPoolNameFromNodeLabel ¶ added in v1.33.1
func FindFirstLevelOwnerReference ¶ added in v1.35.0
func FindFirstLevelOwnerReference(obj metav1.Object) *metav1.OwnerReference
FindFirstLevelOwnerReference recursively finds the root owner reference for a given object (e.g. Pod).
func FindRootOwnerReference ¶ added in v1.26.9
func FindRootOwnerReference(ctx context.Context, c client.Client, namespace string, obj metav1.Object) (*metav1.OwnerReference, error)
FindRootOwnerReference recursively finds the root owner reference for a given object (e.g. Pod).
func GetEnvOrDefault ¶ added in v1.34.0
func GetGPUResource ¶ added in v1.35.0
func GetInitialGPUNodeSelector ¶ added in v1.43.5
func GetInitialGPUNodeSelector() []string
func GetObjectHash ¶
GetObjectHash generates a shorter FNV-1a hash for one or more objects
func GetSelfServiceAccountNameFull ¶ added in v1.37.0
func GetSelfServiceAccountNameFull() string
func GetSelfServiceAccountNameShort ¶ added in v1.37.0
func GetSelfServiceAccountNameShort() string
func HandleFinalizer ¶
func HandleFinalizer[T client.Object]( ctx context.Context, obj T, r client.Client, deleteHook func(context.Context, T) (bool, error), ) (shouldReturn bool, err error)
HandleFinalizer ensures proper finalizer management for Kubernetes resources. It automatically adds the finalizer when needed, and removes it after successful cleanup. Returns (shouldReturn, err):
- shouldReturn: true if the caller should immediately return and wait for the next reconcile.
 - err: any error encountered during update or deleteHook.
 
func HasGPUResourceRequest ¶ added in v1.39.0
func InitServiceAccountConfig ¶ added in v1.36.1
func InitServiceAccountConfig()
func IsPodConditionTrue ¶
func IsPodConditionTrue(conditions []corev1.PodCondition, conditionType corev1.PodConditionType) bool
func IsPodStopped ¶ added in v1.37.0
func IsProgressiveMigration ¶ added in v1.39.0
func IsProgressiveMigration() bool
func IsTensorFusionPod ¶ added in v1.39.0
func IsTensorFusionWorker ¶ added in v1.39.1
func LoadConfigFromFile ¶ added in v1.34.0
func NewShortID ¶ added in v1.35.0
func ReadServiceAccountToken ¶ added in v1.36.1
func ReadServiceAccountToken() string
func SetProgressiveMigration ¶ added in v1.39.0
func SetProgressiveMigration(isProgressiveMigration bool)
For test purpose only
func WatchConfigFileChanges ¶ added in v1.34.0
WatchConfigFileChanges watches a file for changes and sends the file content through a channel when changes are detected. The channel will receive the raw file content as []byte whenever the file is modified. The watch interval is set to 15 seconds by default.
Types ¶
type TensorFusionInfo ¶ added in v1.37.0
type TensorFusionInfo struct {
	Profile         *tfv1.WorkloadProfileSpec
	DynamicReplicas bool
	EnabledReplicas *int32
	WorkloadName    string
	ContainerNames  []string
	GenWorkload     bool
	// Pod mutating webhook can not get Pod UID sometimes,
	// thus need pod controller to set the owner reference
	PendingSetPodAsOwner bool
}