Documentation
¶
Index ¶
Constants ¶
View Source
const ( // PaddleJobDefaultPortName is name of the port used to communicate between Master and // workers. PaddleJobDefaultPortName = "master" // PaddleJobDefaultContainerName is the name of the PaddleJob container. PaddleJobDefaultContainerName = "paddle" // PaddleJobDefaultPort is default value of the port. PaddleJobDefaultPort = 36543 // PaddleJobDefaultRestartPolicy is default RestartPolicy for PaddleReplicaSpec. PaddleJobDefaultRestartPolicy = commonv1.RestartPolicyOnFailure // PaddleJobKind is the kind name. PaddleJobKind = "PaddleJob" // PaddleJobPlural is the PaddlePlural for paddleJob. PaddleJobPlural = "paddlejobs" // PaddleJobSingular is the singular for paddleJob. PaddleJobSingular = "paddlejob" // PaddleJobFrameworkName is the name of the ML Framework PaddleJobFrameworkName = "paddle" // PaddleJobReplicaTypeMaster is the type of Master of distributed Paddle PaddleJobReplicaTypeMaster commonv1.ReplicaType = "Master" // PaddleJobReplicaTypeWorker is the type for workers of distributed Paddle. PaddleJobReplicaTypeWorker commonv1.ReplicaType = "Worker" )
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type PaddleElasticPolicy ¶
type PaddleElasticPolicy struct {
// minReplicas is the lower limit for the number of replicas to which the training job
// can scale down. It defaults to null.
// +optional
MinReplicas *int32 `json:"minReplicas,omitempty"`
// upper limit for the number of pods that can be set by the autoscaler; cannot be smaller than MinReplicas, defaults to null.
// +optional
MaxReplicas *int32 `json:"maxReplicas,omitempty"`
// MaxRestarts is the limit for restart times of pods in elastic mode.
// +optional
MaxRestarts *int32 `json:"maxRestarts,omitempty"`
// Metrics contains the specifications which are used to calculate the
// desired replica count (the maximum replica count across all metrics will
// be used). The desired replica count is calculated with multiplying the
// ratio between the target value and the current value by the current
// number of pods. Ergo, metrics used must decrease as the pod count is
// increased, and vice-versa. See the individual metric source types for
// more information about how each type of metric must respond.
// If not set, the HPA will not be created.
// +optional
Metrics []autoscalingv2.MetricSpec `json:"metrics,omitempty"`
}
type PaddleJob ¶
type PaddleJob struct {
// Standard Kubernetes type metadata.
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`
// Specification of the desired state of the PaddleJob.
Spec PaddleJobSpec `json:"spec,omitempty"`
// Most recently observed status of the PaddleJob.
// Read-only (modified by the system).
Status commonv1.JobStatus `json:"status,omitempty"`
}
PaddleJob Represents a PaddleJob resource.
type PaddleJobList ¶
type PaddleJobList struct {
// Standard type metadata.
metav1.TypeMeta `json:",inline"`
// Standard list metadata.
metav1.ListMeta `json:"metadata,omitempty"`
// List of PaddleJobs.
Items []PaddleJob `json:"items"`
}
PaddleJobList is a list of PaddleJobs.
type PaddleJobSpec ¶
type PaddleJobSpec struct {
// RunPolicy encapsulates various runtime policies of the distributed training
// job, for example how to clean up resources and how long the job can stay
// active.
//+kubebuilder:validation:Optional
RunPolicy commonv1.RunPolicy `json:"runPolicy"`
// ElasticPolicy holds the elastic policy for paddle job.
ElasticPolicy *PaddleElasticPolicy `json:"elasticPolicy,omitempty"`
// A map of PaddleReplicaType (type) to ReplicaSpec (value). Specifies the Paddle cluster configuration.
// For example,
// {
// "Master": PaddleReplicaSpec,
// "Worker": PaddleReplicaSpec,
// }
PaddleReplicaSpecs map[commonv1.ReplicaType]*commonv1.ReplicaSpec `json:"paddleReplicaSpecs"`
}
PaddleJobSpec is a desired state description of the PaddleJob.
Click to show internal directories.
Click to hide internal directories.