Documentation
¶
Overview ¶
Package v2beta1 is the v2beta1 version of the API. +groupName=kubeflow.org
Index ¶
- Constants
- Variables
- func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenAPIDefinition
- func RegisterDefaults(scheme *runtime.Scheme) error
- func Resource(resource string) schema.GroupResource
- func SetDefaults_MPIJob(mpiJob *MPIJob)
- func SetObjectDefaults_MPIJob(in *MPIJob)
- func SetObjectDefaults_MPIJobList(in *MPIJobList)
- type CleanPodPolicy
- type JobCondition
- type JobConditionType
- type JobStatus
- type MPIImplementation
- type MPIJob
- type MPIJobList
- type MPIJobSpec
- type MPIReplicaType
- type ReplicaStatus
- type RunPolicy
- type SchedulingPolicy
Constants ¶
const ( // EnvKubeflowNamespace is ENV for kubeflow namespace specified by user. EnvKubeflowNamespace = "KUBEFLOW_NAMESPACE" // DefaultRestartPolicy is default RestartPolicy for ReplicaSpec. DefaultRestartPolicy = common.RestartPolicyNever // DefaultLauncherRestartPolicy is default RestartPolicy for Launcher Job. DefaultLauncherRestartPolicy = common.RestartPolicyOnFailure // OperatorName is the name of the operator used as value to the label common.OperatorLabelName OperatorName = "mpi-operator" )
const ( // GroupName is the group name use in this package. GroupName = "kubeflow.org" // Kind is the kind name. Kind = "MPIJob" // GroupVersion is the version. GroupVersion = "v2beta1" )
Variables ¶
var ( SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes, addDefaultingFuncs) AddToScheme = SchemeBuilder.AddToScheme SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: GroupVersion} SchemeGroupVersionKind = schema.GroupVersionKind{Group: GroupName, Version: GroupVersion, Kind: Kind} )
Functions ¶
func GetOpenAPIDefinitions ¶
func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenAPIDefinition
func RegisterDefaults ¶
RegisterDefaults adds defaulters functions to the given scheme. Public to allow building arbitrary schemes. All generated defaulters are covering - they call all nested defaulters.
func Resource ¶
func Resource(resource string) schema.GroupResource
Resource takes an unqualified resource and returns a Group qualified GroupResource.
func SetDefaults_MPIJob ¶
func SetDefaults_MPIJob(mpiJob *MPIJob)
func SetObjectDefaults_MPIJob ¶
func SetObjectDefaults_MPIJob(in *MPIJob)
func SetObjectDefaults_MPIJobList ¶
func SetObjectDefaults_MPIJobList(in *MPIJobList)
Types ¶
type CleanPodPolicy ¶
type CleanPodPolicy string
CleanPodPolicy describes how to deal with pods when the job is finished.
const ( CleanPodPolicyUndefined CleanPodPolicy = "" CleanPodPolicyAll CleanPodPolicy = "All" CleanPodPolicyRunning CleanPodPolicy = "Running" CleanPodPolicyNone CleanPodPolicy = "None" )
type JobCondition ¶
type JobCondition struct {
// type of job condition.
Type JobConditionType `json:"type"`
// status of the condition, one of True, False, Unknown.
// +kubebuilder:validation:Enum:=True;False;Unknown
Status v1.ConditionStatus `json:"status"`
// The reason for the condition's last transition.
// +optional
Reason string `json:"reason,omitempty"`
// A human-readable message indicating details about the transition.
// +optional
Message string `json:"message,omitempty"`
// The last time this condition was updated.
// +optional
LastUpdateTime metav1.Time `json:"lastUpdateTime,omitempty"`
// Last time the condition transitioned from one status to another.
// +optional
LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty"`
}
JobCondition describes the state of the job at a certain point.
func (*JobCondition) DeepCopy ¶
func (in *JobCondition) DeepCopy() *JobCondition
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new JobCondition.
func (*JobCondition) DeepCopyInto ¶
func (in *JobCondition) DeepCopyInto(out *JobCondition)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type JobConditionType ¶
type JobConditionType string
JobConditionType defines all kinds of types of JobStatus.
const ( // JobCreated means the job has been accepted by the system, // but one or more of the pods/services has not been started. // This includes time before pods being scheduled and launched. JobCreated JobConditionType = "Created" // JobRunning means all sub-resources (e.g. services/pods) of this job // have been successfully scheduled and launched. // The training is running without error. JobRunning JobConditionType = "Running" // JobRestarting means one or more sub-resources (e.g. services/pods) of this job // reached phase failed but maybe restarted according to it's restart policy // which specified by user in v1.PodTemplateSpec. // The training is freezing/pending. JobRestarting JobConditionType = "Restarting" // JobSucceeded means all sub-resources (e.g. services/pods) of this job // reached phase have terminated in success. // The training is complete without error. JobSucceeded JobConditionType = "Succeeded" // JobSuspended means the job has been suspended. JobSuspended JobConditionType = "Suspended" // JobFailed means one or more sub-resources (e.g. services/pods) of this job // reached phase failed with no restarting. // The training has failed its execution. JobFailed JobConditionType = "Failed" )
type JobStatus ¶
type JobStatus struct {
// conditions is a list of current observed job conditions.
// +optional
// +listType=map
// +listMapKey=type
Conditions []JobCondition `json:"conditions,omitempty"`
// replicaStatuses is map of ReplicaType and ReplicaStatus,
// specifies the status of each replica.
// +optional
ReplicaStatuses map[MPIReplicaType]*ReplicaStatus `json:"replicaStatuses,omitempty"`
// Represents time when the job was acknowledged by the job controller.
// It is not guaranteed to be set in happens-before order across separate operations.
// It is represented in RFC3339 form and is in UTC.
// +optional
StartTime *metav1.Time `json:"startTime,omitempty"`
// Represents time when the job was completed. It is not guaranteed to
// be set in happens-before order across separate operations.
// It is represented in RFC3339 form and is in UTC.
// +optional
CompletionTime *metav1.Time `json:"completionTime,omitempty"`
// Represents last time when the job was reconciled. It is not guaranteed to
// be set in happens-before order across separate operations.
// It is represented in RFC3339 form and is in UTC.
// +optional
LastReconcileTime *metav1.Time `json:"lastReconcileTime,omitempty"`
}
JobStatus represents the current observed state of the training Job.
func (*JobStatus) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new JobStatus.
func (*JobStatus) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type MPIImplementation ¶
type MPIImplementation string
const ( MPIImplementationOpenMPI MPIImplementation = "OpenMPI" MPIImplementationIntel MPIImplementation = "Intel" )
type MPIJob ¶
type MPIJob struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`
Spec MPIJobSpec `json:"spec,omitempty"`
Status JobStatus `json:"status,omitempty"`
}
func (*MPIJob) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MPIJob.
func (*MPIJob) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*MPIJob) DeepCopyObject ¶
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type MPIJobList ¶
type MPIJobList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata"`
Items []MPIJob `json:"items"`
}
func (*MPIJobList) DeepCopy ¶
func (in *MPIJobList) DeepCopy() *MPIJobList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MPIJobList.
func (*MPIJobList) DeepCopyInto ¶
func (in *MPIJobList) DeepCopyInto(out *MPIJobList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*MPIJobList) DeepCopyObject ¶
func (in *MPIJobList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type MPIJobSpec ¶
type MPIJobSpec struct {
// Specifies the number of slots per worker used in hostfile.
// Defaults to 1.
// +optional
// +kubebuilder:default:=1
SlotsPerWorker *int32 `json:"slotsPerWorker,omitempty"`
// RunPolicy encapsulates various runtime policies of the job.
RunPolicy RunPolicy `json:"runPolicy,omitempty"`
// MPIReplicaSpecs contains maps from `MPIReplicaType` to `ReplicaSpec` that
// specify the MPI replicas to run.
MPIReplicaSpecs map[MPIReplicaType]*common.ReplicaSpec `json:"mpiReplicaSpecs"`
// SSHAuthMountPath is the directory where SSH keys are mounted.
// Defaults to "/root/.ssh".
// +kubebuilder:default:="/root/.ssh"
SSHAuthMountPath string `json:"sshAuthMountPath,omitempty"`
// MPIImplementation is the MPI implementation.
// Options are "OpenMPI" (default) and "Intel".
// +kubebuilder:validation:Enum:=OpenMPI;Intel
// +kubebuilder:default:=OpenMPI
MPIImplementation MPIImplementation `json:"mpiImplementation,omitempty"`
}
func (*MPIJobSpec) DeepCopy ¶
func (in *MPIJobSpec) DeepCopy() *MPIJobSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MPIJobSpec.
func (*MPIJobSpec) DeepCopyInto ¶
func (in *MPIJobSpec) DeepCopyInto(out *MPIJobSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type MPIReplicaType ¶
type MPIReplicaType string
MPIReplicaType is the type for MPIReplica.
const ( // MPIReplicaTypeLauncher is the type for launcher replica. MPIReplicaTypeLauncher MPIReplicaType = "Launcher" // MPIReplicaTypeWorker is the type for worker replicas. MPIReplicaTypeWorker MPIReplicaType = "Worker" )
type ReplicaStatus ¶
type ReplicaStatus struct {
// The number of actively running pods.
// +optional
Active int32 `json:"active,omitempty"`
// The number of pods which reached phase succeeded.
// +optional
Succeeded int32 `json:"succeeded,omitempty"`
// The number of pods which reached phase failed.
// +optional
Failed int32 `json:"failed,omitempty"`
// Deprecated: Use selector instead
// +optional
LabelSelector *metav1.LabelSelector `json:"labelSelector,omitempty"`
// A selector is a label query over a set of resources. The result of matchLabels and
// matchExpressions are ANDed. An empty selector matches all objects. A null
// selector matches no objects.
// +optional
Selector string `json:"selector,omitempty"`
}
ReplicaStatus represents the current observed state of the replica.
func (*ReplicaStatus) DeepCopy ¶
func (in *ReplicaStatus) DeepCopy() *ReplicaStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ReplicaStatus.
func (*ReplicaStatus) DeepCopyInto ¶
func (in *ReplicaStatus) DeepCopyInto(out *ReplicaStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type RunPolicy ¶
type RunPolicy struct {
// CleanPodPolicy defines the policy to kill pods after the job completes.
// Default to Running.
CleanPodPolicy *CleanPodPolicy `json:"cleanPodPolicy,omitempty"`
// TTLSecondsAfterFinished is the TTL to clean up jobs.
// It may take extra ReconcilePeriod seconds for the cleanup, since
// reconcile gets called periodically.
// Default to infinite.
TTLSecondsAfterFinished *int32 `json:"ttlSecondsAfterFinished,omitempty"`
// Specifies the duration in seconds relative to the startTime that the job may be active
// before the system tries to terminate it; value must be positive integer.
// +optional
ActiveDeadlineSeconds *int64 `json:"activeDeadlineSeconds,omitempty"`
// Optional number of retries before marking this job failed.
// +optional
BackoffLimit *int32 `json:"backoffLimit,omitempty"`
// SchedulingPolicy defines the policy related to scheduling, e.g. gang-scheduling
// +optional
SchedulingPolicy *SchedulingPolicy `json:"schedulingPolicy,omitempty"`
// suspend specifies whether the MPIJob controller should create Pods or not.
// If a MPIJob is created with suspend set to true, no Pods are created by
// the MPIJob controller. If a MPIJob is suspended after creation (i.e. the
// flag goes from false to true), the MPIJob controller will delete all
// active Pods and PodGroups associated with this MPIJob. Also, it will suspend the
// Launcher Job. Users must design their workload to gracefully handle this.
// Suspending a Job will reset the StartTime field of the MPIJob.
//
// Defaults to false.
// +kubebuilder:default:=false
Suspend *bool `json:"suspend,omitempty"`
}
RunPolicy encapsulates various runtime policies of the distributed training job, for example how to clean up resources and how long the job can stay active.
func (*RunPolicy) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RunPolicy.
func (*RunPolicy) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type SchedulingPolicy ¶
type SchedulingPolicy struct {
// MinAvailable defines the minimal number of member to run the PodGroup.
// If the gang-scheduling isn't empty, input is passed to `.spec.minMember` in PodGroup.
// Note that, when using this field,
// you need to make sure the application supports resizing (e.g., Elastic Horovod).
//
// If not set, it defaults to the number of workers.
// +optional
MinAvailable *int32 `json:"minAvailable,omitempty"`
// Queue defines the queue name to allocate resource for PodGroup.
// If the gang-scheduling is set to the volcano,
// input is passed to `.spec.queue` in PodGroup for the volcano,
// and if it is set to the scheduler-plugins,
// input isn't passed to PodGroup.
// +optional
Queue string `json:"queue,omitempty"`
// MinResources defines the minimal resources of members to run the PodGroup.
// If the gang-scheduling isn't empty,
// input is passed to `.spec.minResources` in PodGroup for scheduler-plugins.
// +optional
MinResources *v1.ResourceList `json:"minResources,omitempty"`
// PriorityClass defines the PodGroup's PriorityClass.
// If the gang-scheduling is set to the volcano,
// input is passed to `.spec.priorityClassName` in PodGroup for volcano,
// and if it is set to the scheduler-plugins,
// input isn't passed to PodGroup for scheduler-plugins.
// +optional
PriorityClass string `json:"priorityClass,omitempty"`
// SchedulerTimeoutSeconds defines the maximal time of members to wait before run the PodGroup.
// If the gang-scheduling is set to the scheduler-plugins,
// input is passed to `.spec.scheduleTimeoutSeconds` in PodGroup for the scheduler-plugins,
// and if it is set to the volcano, input isn't passed to PodGroup.
// +optional
ScheduleTimeoutSeconds *int32 `json:"scheduleTimeoutSeconds,omitempty"`
}
SchedulingPolicy encapsulates various scheduling policies of the distributed training job, for example `minAvailable` for gang-scheduling. Now, it supports only for volcano and scheduler-plugins.
func (*SchedulingPolicy) DeepCopy ¶
func (in *SchedulingPolicy) DeepCopy() *SchedulingPolicy
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingPolicy.
func (*SchedulingPolicy) DeepCopyInto ¶
func (in *SchedulingPolicy) DeepCopyInto(out *SchedulingPolicy)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.