 Documentation
      ¶
      Documentation
      ¶
    
    
  
    
  
    Overview ¶
Package v2beta1 is the v2beta1 version of the API. +groupName=kubeflow.org
Index ¶
- Constants
- Variables
- func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenAPIDefinition
- func RegisterDefaults(scheme *runtime.Scheme) error
- func Resource(resource string) schema.GroupResource
- func SetDefaults_MPIJob(mpiJob *MPIJob)
- func SetObjectDefaults_MPIJob(in *MPIJob)
- func SetObjectDefaults_MPIJobList(in *MPIJobList)
- type CleanPodPolicy
- type JobCondition
- type JobConditionType
- type JobStatus
- type MPIImplementation
- type MPIJob
- type MPIJobList
- type MPIJobSpec
- type MPIReplicaType
- type ReplicaStatus
- type RunPolicy
- type SchedulingPolicy
Constants ¶
const ( // EnvKubeflowNamespace is ENV for kubeflow namespace specified by user. EnvKubeflowNamespace = "KUBEFLOW_NAMESPACE" // DefaultRestartPolicy is default RestartPolicy for ReplicaSpec. DefaultRestartPolicy = common.RestartPolicyNever // DefaultLauncherRestartPolicy is default RestartPolicy for Launcher Job. DefaultLauncherRestartPolicy = common.RestartPolicyOnFailure // OperatorName is the name of the operator used as value to the label common.OperatorLabelName OperatorName = "mpi-operator" )
const ( // GroupName is the group name use in this package. GroupName = "kubeflow.org" // Kind is the kind name. Kind = "MPIJob" // GroupVersion is the version. GroupVersion = "v2beta1" )
Variables ¶
var ( SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes, addDefaultingFuncs) AddToScheme = SchemeBuilder.AddToScheme SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: GroupVersion} SchemeGroupVersionKind = schema.GroupVersionKind{Group: GroupName, Version: GroupVersion, Kind: Kind} )
Functions ¶
func GetOpenAPIDefinitions ¶
func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenAPIDefinition
func RegisterDefaults ¶
RegisterDefaults adds defaulters functions to the given scheme. Public to allow building arbitrary schemes. All generated defaulters are covering - they call all nested defaulters.
func Resource ¶
func Resource(resource string) schema.GroupResource
Resource takes an unqualified resource and returns a Group qualified GroupResource.
func SetDefaults_MPIJob ¶
func SetDefaults_MPIJob(mpiJob *MPIJob)
func SetObjectDefaults_MPIJob ¶
func SetObjectDefaults_MPIJob(in *MPIJob)
func SetObjectDefaults_MPIJobList ¶
func SetObjectDefaults_MPIJobList(in *MPIJobList)
Types ¶
type CleanPodPolicy ¶
type CleanPodPolicy string
CleanPodPolicy describes how to deal with pods when the job is finished.
const ( CleanPodPolicyUndefined CleanPodPolicy = "" CleanPodPolicyAll CleanPodPolicy = "All" CleanPodPolicyRunning CleanPodPolicy = "Running" CleanPodPolicyNone CleanPodPolicy = "None" )
type JobCondition ¶
type JobCondition struct {
	// type of job condition.
	Type JobConditionType `json:"type"`
	// status of the condition, one of True, False, Unknown.
	// +kubebuilder:validation:Enum:=True;False;Unknown
	Status v1.ConditionStatus `json:"status"`
	// The reason for the condition's last transition.
	// +optional
	Reason string `json:"reason,omitempty"`
	// A human-readable message indicating details about the transition.
	// +optional
	Message string `json:"message,omitempty"`
	// The last time this condition was updated.
	// +optional
	LastUpdateTime metav1.Time `json:"lastUpdateTime,omitempty"`
	// Last time the condition transitioned from one status to another.
	// +optional
	LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty"`
}
    JobCondition describes the state of the job at a certain point.
func (*JobCondition) DeepCopy ¶
func (in *JobCondition) DeepCopy() *JobCondition
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new JobCondition.
func (*JobCondition) DeepCopyInto ¶
func (in *JobCondition) DeepCopyInto(out *JobCondition)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type JobConditionType ¶
type JobConditionType string
JobConditionType defines all kinds of types of JobStatus.
const ( // JobCreated means the job has been accepted by the system, // but one or more of the pods/services has not been started. // This includes time before pods being scheduled and launched. JobCreated JobConditionType = "Created" // JobRunning means all sub-resources (e.g. services/pods) of this job // have been successfully scheduled and launched. // The training is running without error. JobRunning JobConditionType = "Running" // JobRestarting means one or more sub-resources (e.g. services/pods) of this job // reached phase failed but maybe restarted according to it's restart policy // which specified by user in v1.PodTemplateSpec. // The training is freezing/pending. JobRestarting JobConditionType = "Restarting" // JobSucceeded means all sub-resources (e.g. services/pods) of this job // reached phase have terminated in success. // The training is complete without error. JobSucceeded JobConditionType = "Succeeded" // JobSuspended means the job has been suspended. JobSuspended JobConditionType = "Suspended" // JobFailed means one or more sub-resources (e.g. services/pods) of this job // reached phase failed with no restarting. // The training has failed its execution. JobFailed JobConditionType = "Failed" )
type JobStatus ¶
type JobStatus struct {
	// conditions is a list of current observed job conditions.
	// +optional
	// +listType=map
	// +listMapKey=type
	Conditions []JobCondition `json:"conditions,omitempty"`
	// replicaStatuses is map of ReplicaType and ReplicaStatus,
	// specifies the status of each replica.
	// +optional
	ReplicaStatuses map[MPIReplicaType]*ReplicaStatus `json:"replicaStatuses,omitempty"`
	// Represents time when the job was acknowledged by the job controller.
	// It is not guaranteed to be set in happens-before order across separate operations.
	// It is represented in RFC3339 form and is in UTC.
	// +optional
	StartTime *metav1.Time `json:"startTime,omitempty"`
	// Represents time when the job was completed. It is not guaranteed to
	// be set in happens-before order across separate operations.
	// It is represented in RFC3339 form and is in UTC.
	// +optional
	CompletionTime *metav1.Time `json:"completionTime,omitempty"`
	// Represents last time when the job was reconciled. It is not guaranteed to
	// be set in happens-before order across separate operations.
	// It is represented in RFC3339 form and is in UTC.
	// +optional
	LastReconcileTime *metav1.Time `json:"lastReconcileTime,omitempty"`
}
    JobStatus represents the current observed state of the training Job.
func (*JobStatus) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new JobStatus.
func (*JobStatus) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type MPIImplementation ¶
type MPIImplementation string
const ( MPIImplementationOpenMPI MPIImplementation = "OpenMPI" MPIImplementationIntel MPIImplementation = "Intel" )
type MPIJob ¶
type MPIJob struct {
	metav1.TypeMeta   `json:",inline"`
	metav1.ObjectMeta `json:"metadata,omitempty"`
	Spec              MPIJobSpec `json:"spec,omitempty"`
	Status            JobStatus  `json:"status,omitempty"`
}
    func (*MPIJob) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MPIJob.
func (*MPIJob) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*MPIJob) DeepCopyObject ¶
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type MPIJobList ¶
type MPIJobList struct {
	metav1.TypeMeta `json:",inline"`
	metav1.ListMeta `json:"metadata"`
	Items           []MPIJob `json:"items"`
}
    func (*MPIJobList) DeepCopy ¶
func (in *MPIJobList) DeepCopy() *MPIJobList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MPIJobList.
func (*MPIJobList) DeepCopyInto ¶
func (in *MPIJobList) DeepCopyInto(out *MPIJobList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*MPIJobList) DeepCopyObject ¶
func (in *MPIJobList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type MPIJobSpec ¶
type MPIJobSpec struct {
	// Specifies the number of slots per worker used in hostfile.
	// Defaults to 1.
	// +optional
	// +kubebuilder:default:=1
	SlotsPerWorker *int32 `json:"slotsPerWorker,omitempty"`
	// RunPolicy encapsulates various runtime policies of the job.
	RunPolicy RunPolicy `json:"runPolicy,omitempty"`
	// MPIReplicaSpecs contains maps from `MPIReplicaType` to `ReplicaSpec` that
	// specify the MPI replicas to run.
	MPIReplicaSpecs map[MPIReplicaType]*common.ReplicaSpec `json:"mpiReplicaSpecs"`
	// SSHAuthMountPath is the directory where SSH keys are mounted.
	// Defaults to "/root/.ssh".
	// +kubebuilder:default:="/root/.ssh"
	SSHAuthMountPath string `json:"sshAuthMountPath,omitempty"`
	// MPIImplementation is the MPI implementation.
	// Options are "OpenMPI" (default) and "Intel".
	// +kubebuilder:validation:Enum:=OpenMPI;Intel
	// +kubebuilder:default:=OpenMPI
	MPIImplementation MPIImplementation `json:"mpiImplementation,omitempty"`
}
    func (*MPIJobSpec) DeepCopy ¶
func (in *MPIJobSpec) DeepCopy() *MPIJobSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MPIJobSpec.
func (*MPIJobSpec) DeepCopyInto ¶
func (in *MPIJobSpec) DeepCopyInto(out *MPIJobSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type MPIReplicaType ¶
type MPIReplicaType string
MPIReplicaType is the type for MPIReplica.
const ( // MPIReplicaTypeLauncher is the type for launcher replica. MPIReplicaTypeLauncher MPIReplicaType = "Launcher" // MPIReplicaTypeWorker is the type for worker replicas. MPIReplicaTypeWorker MPIReplicaType = "Worker" )
type ReplicaStatus ¶
type ReplicaStatus struct {
	// The number of actively running pods.
	// +optional
	Active int32 `json:"active,omitempty"`
	// The number of pods which reached phase succeeded.
	// +optional
	Succeeded int32 `json:"succeeded,omitempty"`
	// The number of pods which reached phase failed.
	// +optional
	Failed int32 `json:"failed,omitempty"`
	// Deprecated: Use selector instead
	// +optional
	LabelSelector *metav1.LabelSelector `json:"labelSelector,omitempty"`
	// A selector is a label query over a set of resources. The result of matchLabels and
	// matchExpressions are ANDed. An empty selector matches all objects. A null
	// selector matches no objects.
	// +optional
	Selector string `json:"selector,omitempty"`
}
    ReplicaStatus represents the current observed state of the replica.
func (*ReplicaStatus) DeepCopy ¶
func (in *ReplicaStatus) DeepCopy() *ReplicaStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ReplicaStatus.
func (*ReplicaStatus) DeepCopyInto ¶
func (in *ReplicaStatus) DeepCopyInto(out *ReplicaStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type RunPolicy ¶
type RunPolicy struct {
	// CleanPodPolicy defines the policy to kill pods after the job completes.
	// Default to Running.
	CleanPodPolicy *CleanPodPolicy `json:"cleanPodPolicy,omitempty"`
	// TTLSecondsAfterFinished is the TTL to clean up jobs.
	// It may take extra ReconcilePeriod seconds for the cleanup, since
	// reconcile gets called periodically.
	// Default to infinite.
	TTLSecondsAfterFinished *int32 `json:"ttlSecondsAfterFinished,omitempty"`
	// Specifies the duration in seconds relative to the startTime that the job may be active
	// before the system tries to terminate it; value must be positive integer.
	// +optional
	ActiveDeadlineSeconds *int64 `json:"activeDeadlineSeconds,omitempty"`
	// Optional number of retries before marking this job failed.
	// +optional
	BackoffLimit *int32 `json:"backoffLimit,omitempty"`
	// SchedulingPolicy defines the policy related to scheduling, e.g. gang-scheduling
	// +optional
	SchedulingPolicy *SchedulingPolicy `json:"schedulingPolicy,omitempty"`
	// suspend specifies whether the MPIJob controller should create Pods or not.
	// If a MPIJob is created with suspend set to true, no Pods are created by
	// the MPIJob controller. If a MPIJob is suspended after creation (i.e. the
	// flag goes from false to true), the MPIJob controller will delete all
	// active Pods and PodGroups associated with this MPIJob. Also, it will suspend the
	// Launcher Job. Users must design their workload to gracefully handle this.
	// Suspending a Job will reset the StartTime field of the MPIJob.
	//
	// Defaults to false.
	// +kubebuilder:default:=false
	Suspend *bool `json:"suspend,omitempty"`
}
    RunPolicy encapsulates various runtime policies of the distributed training job, for example how to clean up resources and how long the job can stay active.
func (*RunPolicy) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RunPolicy.
func (*RunPolicy) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type SchedulingPolicy ¶
type SchedulingPolicy struct {
	// MinAvailable defines the minimal number of member to run the PodGroup.
	// If the gang-scheduling isn't empty, input is passed to `.spec.minMember` in PodGroup.
	// Note that, when using this field,
	// you need to make sure the application supports resizing (e.g., Elastic Horovod).
	//
	// If not set, it defaults to the number of workers.
	// +optional
	MinAvailable *int32 `json:"minAvailable,omitempty"`
	// Queue defines the queue name to allocate resource for PodGroup.
	// If the gang-scheduling is set to the volcano,
	// input is passed to `.spec.queue` in PodGroup for the volcano,
	// and if it is set to the scheduler-plugins,
	// input isn't passed to PodGroup.
	// +optional
	Queue string `json:"queue,omitempty"`
	// MinResources defines the minimal resources of members to run the PodGroup.
	// If the gang-scheduling isn't empty,
	// input is passed to `.spec.minResources` in PodGroup for scheduler-plugins.
	// +optional
	MinResources *v1.ResourceList `json:"minResources,omitempty"`
	// PriorityClass defines the PodGroup's PriorityClass.
	// If the gang-scheduling is set to the volcano,
	// input is passed to `.spec.priorityClassName` in PodGroup for volcano,
	// and if it is set to the scheduler-plugins,
	// input isn't passed to PodGroup for scheduler-plugins.
	// +optional
	PriorityClass string `json:"priorityClass,omitempty"`
	// SchedulerTimeoutSeconds defines the maximal time of members to wait before run the PodGroup.
	// If the gang-scheduling is set to the scheduler-plugins,
	// input is passed to `.spec.scheduleTimeoutSeconds` in PodGroup for the scheduler-plugins,
	// and if it is set to the volcano, input isn't passed to PodGroup.
	// +optional
	ScheduleTimeoutSeconds *int32 `json:"scheduleTimeoutSeconds,omitempty"`
}
    SchedulingPolicy encapsulates various scheduling policies of the distributed training job, for example `minAvailable` for gang-scheduling. Now, it supports only for volcano and scheduler-plugins.
func (*SchedulingPolicy) DeepCopy ¶
func (in *SchedulingPolicy) DeepCopy() *SchedulingPolicy
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingPolicy.
func (*SchedulingPolicy) DeepCopyInto ¶
func (in *SchedulingPolicy) DeepCopyInto(out *SchedulingPolicy)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.