Documentation
¶
Index ¶
- Constants
- Variables
- func AddEntrypoint(entrypoint string, podTemplateSpec *corev1.PodTemplateSpec) error
- func CheckKaiwoWorkloadShouldBeTerminatedForUnderutilization(ctx context.Context, workload common.KaiwoWorkload) (bool, string)
- func CheckPodStatus(ctx context.Context, k8sClient client.Client, name string, namespace string, ...) (lastStartTime *metav1.Time, status kaiwo.Status, err error)
- func ClusterHasGpuDemand(ctx context.Context, k8sClient client.Client, clusterQueue string, ...) (bool, error)
- func DeleteUnderlyingResources(ctx context.Context, uid types.UID, name string, namespace string, ...) error
- func FillPodResources(podSpec *corev1.PodSpec, resources *corev1.ResourceRequirements, override bool)
- func GetEarliestPodStartTime(ctx context.Context, k8sClient client.Client, name string, namespace string) *metav1.Time
- func GetPodTemplate(config controllerutils.KaiwoConfigContext, dshmSize resource.Quantity, ...) corev1.PodTemplateSpec
- func GetRayClusterTemplate(config controllerutils.KaiwoConfigContext, dangerous bool, ...) *rayv1.RayClusterSpec
- func GetWorkloadPods(ctx context.Context, k8sClient client.Client, workload common.KaiwoWorkload) ([]corev1.Pod, error)
- func RetryForWorkload[T common.KaiwoWorkload](ctx context.Context, k8sClient client.Client, workload T, fn func(T) error) error
- func SetEarlyTermination(ctx context.Context, k8sClient client.Client, workload common.KaiwoWorkload, ...) error
- func ShouldPreempt(ctx context.Context, obj common.KaiwoWorkload, k8sClient client.Client) bool
- func SyncGpuMetaFromPodSpec(podSpec corev1.PodSpec, meta *kaiwo.CommonMetaSpec)
- func TerminateWorkload(ctx context.Context, k8sClient client.Client, recorder record.EventRecorder, ...) error
- func ToPascalCase(s string) string
- func UpdatePodSpec(config controllerutils.KaiwoConfigContext, ...) error
- func ValidateKaiwoResourceBeforeCreateOrUpdate(ctx context.Context, actual client.Object, kaiwoObjectMeta metav1.ObjectMeta) (*ctrl.Result, error)
- type DownloadJobConfigMapReconciler
- type DownloadJobReconciler
- type WorkloadPreemptedReason
- type WorkloadTerminationReason
Constants ¶
const (
KaiwoDownloadTypeLabelValue = "downloader"
)
const WorkloadEarlyTerminationConditionType = "WorkloadTerminatedEarly"
Variables ¶
var ( DefaultMemory = resource.MustParse("16Gi") DefaultCPU = resource.MustParse("2") )
Functions ¶
func AddEntrypoint ¶
func AddEntrypoint(entrypoint string, podTemplateSpec *corev1.PodTemplateSpec) error
AddEntrypoint updates the entrypoint command in the PodTemplateSpec.
func CheckKaiwoWorkloadShouldBeTerminatedForUnderutilization ¶ added in v0.1.5
func CheckKaiwoWorkloadShouldBeTerminatedForUnderutilization(ctx context.Context, workload common.KaiwoWorkload) (bool, string)
CheckKaiwoWorkloadShouldBeTerminatedForUnderutilization checks if the Kaiwo workload should be terminated due to resource underutilization
func CheckPodStatus ¶
func ClusterHasGpuDemand ¶ added in v0.1.4
func ClusterHasGpuDemand(ctx context.Context, k8sClient client.Client, clusterQueue string, gpuVendor string, config controllerutils.KaiwoConfigContext) (bool, error)
func DeleteUnderlyingResources ¶ added in v0.1.5
func DeleteUnderlyingResources(ctx context.Context, uid types.UID, name string, namespace string, k8sClient client.Client) error
DeleteUnderlyingResources deletes all the underlying resources that a workload owns
func FillPodResources ¶
func FillPodResources(podSpec *corev1.PodSpec, resources *corev1.ResourceRequirements, override bool)
FillPodResources fills pod resources with a given template if they are not already set
func GetEarliestPodStartTime ¶
func GetPodTemplate ¶
func GetPodTemplate(config controllerutils.KaiwoConfigContext, dshmSize resource.Quantity, dangerous bool, resources corev1.ResourceRequirements, workloadContainerName string) corev1.PodTemplateSpec
func GetRayClusterTemplate ¶
func GetRayClusterTemplate(config controllerutils.KaiwoConfigContext, dangerous bool, resourceRequirements v1.ResourceRequirements) *rayv1.RayClusterSpec
func GetWorkloadPods ¶ added in v0.1.5
func RetryForWorkload ¶ added in v0.1.5
func RetryForWorkload[T common.KaiwoWorkload](ctx context.Context, k8sClient client.Client, workload T, fn func(T) error) error
RetryForWorkload provides a wrapper for an atomic action on a workload object by first reading a fresh copy of the object and passing it to the function, and retrying on conflict
func SetEarlyTermination ¶ added in v0.1.5
func SetEarlyTermination(ctx context.Context, k8sClient client.Client, workload common.KaiwoWorkload, reason string, message string) error
SetEarlyTermination flags a workload for early termination by 1. Setting the status to TERMINATING 2. Creating the WorkloadTerminatedEarly condition, but keeping its status as False (in order to record the reason)
func ShouldPreempt ¶ added in v0.1.4
func SyncGpuMetaFromPodSpec ¶ added in v0.1.4
func SyncGpuMetaFromPodSpec(podSpec corev1.PodSpec, meta *kaiwo.CommonMetaSpec)
func TerminateWorkload ¶ added in v0.1.5
func TerminateWorkload( ctx context.Context, k8sClient client.Client, recorder record.EventRecorder, workload common.KaiwoWorkload, ) error
TerminateWorkload terminates a given workload by deleting all the child objects and setting an early termination condition and emitting an event
func ToPascalCase ¶ added in v0.1.4
ToPascalCase transforms a string like "hello there" into "HelloThere".
func UpdatePodSpec ¶
func UpdatePodSpec(config controllerutils.KaiwoConfigContext, kaiwoCommonMetaSpec kaiwo.CommonMetaSpec, labelContext common.KaiwoLabelContext, template *corev1.PodTemplateSpec, name string, replicas int, gpusPerReplica int, override bool, rayhead bool) error
Types ¶
type DownloadJobConfigMapReconciler ¶
type DownloadJobConfigMapReconciler struct {
common.ResourceReconcilerBase[*corev1.ConfigMap]
StorageSpec *v1alpha1.StorageSpec
}
func NewDownloadJobConfigMapReconciler ¶
func NewDownloadJobConfigMapReconciler(objectKey client.ObjectKey, storageSpec *v1alpha1.StorageSpec) *DownloadJobConfigMapReconciler
func (*DownloadJobConfigMapReconciler) GetEmptyObject ¶
func (r *DownloadJobConfigMapReconciler) GetEmptyObject() *corev1.ConfigMap
type DownloadJobReconciler ¶
type DownloadJobReconciler struct {
common.ResourceReconcilerBase[*batchv1.Job]
StorageSpec *v1alpha1.StorageSpec
PvcBaseName string
UserEnvVars []corev1.EnvVar
}
func NewDownloadJobReconciler ¶
func NewDownloadJobReconciler(objectKey client.ObjectKey, storageSpec *v1alpha1.StorageSpec, pvcBaseName string, userEnvVars []corev1.EnvVar) *DownloadJobReconciler
func (*DownloadJobReconciler) GetEmptyObject ¶
func (r *DownloadJobReconciler) GetEmptyObject() *batchv1.Job
func (*DownloadJobReconciler) ShouldContinue ¶
type WorkloadPreemptedReason ¶ added in v0.1.5
type WorkloadPreemptedReason string
const WorkloadPreempted WorkloadPreemptedReason = "WorkloadPreempted"
type WorkloadTerminationReason ¶ added in v0.1.5
type WorkloadTerminationReason string