Documentation
¶
Index ¶
- Constants
- func ToDockerMounts(bindMounts []expconf.BindMount, workDir string) []mount.Mount
- func TrialSpecProxyPorts(taskSpec *TaskSpec, expConfig expconf.ExperimentConfigV0) expconf.ProxyPortsConfig
- func ValidatePbs(pbsOptions []string) []error
- func ValidateSlurm(slurmOptions []string) []error
- type GCCkptSpec
- type GenericCommandSpec
- type GenericTaskSpec
- func (s GenericTaskSpec) ResourcePool() string
- func (s GenericTaskSpec) SetJobPriority(priority int) error
- func (s GenericTaskSpec) SetResourcePool(resourcePool string) error
- func (s GenericTaskSpec) SetWeight(weight float64) error
- func (s GenericTaskSpec) ToTaskSpec() TaskSpec
- func (s GenericTaskSpec) ToV1Job() (*jobv1.Job, error)
- type TaskSpec
- func (t *TaskSpec) Archives() ([]cproto.RunArchive, []cproto.RunArchive)
- func (t *TaskSpec) Clone() (*TaskSpec, error)
- func (t TaskSpec) EnvVars() map[string]string
- func (t *TaskSpec) LogShipperWrappedEntrypoint() []string
- func (t *TaskSpec) ResolveWorkDir()
- func (t *TaskSpec) ToDispatcherManifest(syslog *logrus.Entry, allocationID string, tlsEnabled bool, masterHost string, ...) (*launcher.Manifest, string, string, error)
- func (t *TaskSpec) ToDockerSpec() cproto.Spec
- func (t *TaskSpec) WarnUnsupportedOptions(userConfiguredPriority bool, containerRunType string) string
- type TaskSpecifier
- type TrialSpec
Constants ¶
const ( // SingularityEntrypointWrapperScript is just the name of the singularity entrypoint wrapper. SingularityEntrypointWrapperScript = "singularity-entrypoint-wrapper.sh" // StartupHookScript contains the script to run on task startup filled in dynamically. StartupHookScript = "dynamic-tcd-startup-hook.sh" )
const ( // DefaultWorkDir is the default workdir. DefaultWorkDir = "/run/determined/workdir" RunDir = "/run/determined" PasswdPath = "/run/determined/etc/passwd" ShadowPath = "/run/determined/etc/shadow" GroupPath = "/run/determined/etc/group" // DtrainSSHPortBase is starting range for Dtrain ports. DtrainSSHPortBase = 12350 // InterTrainProcessCommPort1Base is starting range for intertraincomm1 ports. InterTrainProcessCommPort1Base = 12360 // InterTrainProcessCommPort2Base is starting range for intertraincomm2 ports. InterTrainProcessCommPort2Base = 12365 // C10DPortBase is starting range for c10D ports. C10DPortBase = 29400 // DTrainSSHPort is the name of a port. DTrainSSHPort = "DTRAIN_SSH_PORT" // InterTrainProcessCommPort1 is the name of a port. InterTrainProcessCommPort1 = "INTER_TRAIN_PROCESS_COMM_PORT_1" // InterTrainProcessCommPort2 is the name of a port. InterTrainProcessCommPort2 = "INTER_TRAIN_PROCESS_COMM_PORT_2" // C10DPort is the name of a port. C10DPort = "C10D_PORT" )
File location constants.
const (
// ManifestName is the name used by DAI RM when creating HPC job manifests.
ManifestName = "det"
)
Variables ¶
This section is empty.
Functions ¶
func ToDockerMounts ¶
ToDockerMounts converts expconf bind mounts to container mounts.
func TrialSpecProxyPorts ¶
func TrialSpecProxyPorts( taskSpec *TaskSpec, expConfig expconf.ExperimentConfigV0, ) expconf.ProxyPortsConfig
TrialSpecProxyPorts combines user-defined and system proxy configs. This static function is public because trial actor builds `TrialSpec` instances late.
func ValidatePbs ¶
ValidatePbs checks that the specified PBS options are allowed. If any are not messages are returned in an array of errors.
func ValidateSlurm ¶
ValidateSlurm checks that the specified slurm options are allowed. If any are not messages are returned in an array of errors.
Types ¶
type GCCkptSpec ¶
type GCCkptSpec struct {
Base TaskSpec
ExperimentID int
LegacyConfig expconf.LegacyConfig
ToDelete string
// If len(CheckpointGlobs) == 0 then we won't delete any checkpoint files
// and just refresh the state of the checkpoint.
CheckpointGlobs []string
DeleteTensorboards bool
}
GCCkptSpec is a description of a task for running checkpoint GC.
func (GCCkptSpec) ToTaskSpec ¶
func (g GCCkptSpec) ToTaskSpec() TaskSpec
ToTaskSpec generates a TaskSpec.
type GenericCommandSpec ¶
type GenericCommandSpec struct {
Base TaskSpec
CommandID string
Config model.CommandConfig
// Deprecated: kept so we can still marshal to this.
// Please use command.CreateGeneric.modelDef instead.
UserFiles archive.Archive
AdditionalFiles archive.Archive
Metadata genericCommandSpecMetadata
Keys *ssh.PrivateAndPublicKeys
WatchProxyIdleTimeout bool
WatchRunnerIdleTimeout bool
TaskType model.TaskType
}
GenericCommandSpec is a description of a task for running a command.
func (*GenericCommandSpec) MakeEnvPorts ¶
func (s *GenericCommandSpec) MakeEnvPorts()
MakeEnvPorts fills in `Environment.Ports` i.e. exposed ports for container config.
func (*GenericCommandSpec) ProxyPorts ¶
func (s *GenericCommandSpec) ProxyPorts() expconf.ProxyPortsConfig
ProxyPorts combines user-defined and system proxy configs.
func (GenericCommandSpec) ToTaskSpec ¶
func (s GenericCommandSpec) ToTaskSpec() TaskSpec
ToTaskSpec generates a TaskSpec.
type GenericTaskSpec ¶
type GenericTaskSpec struct {
Base TaskSpec
ProjectID int
WorkspaceID int
RegisteredTime time.Time
JobID model.JobID
GenericTaskConfig model.GenericTaskConfig
}
GenericTaskSpec is the generic task spec.
func (GenericTaskSpec) ResourcePool ¶
func (s GenericTaskSpec) ResourcePool() string
ResourcePool - returns resource pool.
func (GenericTaskSpec) SetJobPriority ¶
func (s GenericTaskSpec) SetJobPriority(priority int) error
SetJobPriority todo.
func (GenericTaskSpec) SetResourcePool ¶
func (s GenericTaskSpec) SetResourcePool(resourcePool string) error
SetResourcePool todo.
func (GenericTaskSpec) SetWeight ¶
func (s GenericTaskSpec) SetWeight(weight float64) error
SetWeight todo.
func (GenericTaskSpec) ToTaskSpec ¶
func (s GenericTaskSpec) ToTaskSpec() TaskSpec
ToTaskSpec converts the generic task spec to the common task spec.
type TaskSpec ¶
type TaskSpec struct {
// Fields that are only for task logics.
Description string
// LoggingFields are fields to include in each record of structured logging.
LoggingFields map[string]string
// LogRetentionDays is the number of days to retain logs for.
LogRetentionDays *int16
// Fields that are set on the cluster level.
ClusterID string
HarnessPath string
MasterCert []byte
SSHRsaSize int
SegmentEnabled bool
SegmentAPIKey string
// Fields that are set on the per-request basis.
// TaskContainerDefaults should be removed from TaskSpec once we move to using the same
// schema for the cluster-level defaults and the request-level configuration.
TaskContainerDefaults model.TaskContainerDefaultsConfig
Environment expconf.EnvironmentConfig
ResourcesConfig expconf.ResourcesConfig
WorkDir string
Owner *model.User
AgentUserGroup *model.AgentUserGroup
ExtraArchives []cproto.RunArchive
ExtraEnvVars map[string]string
ExtraPodLabels map[string]string
Entrypoint []string
Mounts []mount.Mount
// UseHostMode is whether host mode networking would be desirable for this task.
// This is used by Docker only.
UseHostMode bool
ShmSize int64
// The parent task of an allocation.
TaskID string
// Fields that are set on per-resources basis.
AllocationID string
AllocationSessionToken string
ResourcesID string
ContainerID string
Devices []device.Device
UserSessionToken string
TaskType model.TaskType
SlurmConfig expconf.SlurmConfig
PbsConfig expconf.PbsConfig
ExtraProxyPorts expconf.ProxyPortsConfig
Workspace string
Project string
Labels []string
// Ports required by trial or commands and their respective base port values.
UniqueExposedPortRequests map[string]int
// For testing only.
DontShipLogs bool
}
TaskSpec defines the spec of a task.
func (*TaskSpec) Archives ¶
func (t *TaskSpec) Archives() ([]cproto.RunArchive, []cproto.RunArchive)
Archives returns all the archives.
func (*TaskSpec) LogShipperWrappedEntrypoint ¶
LogShipperWrappedEntrypoint returns the configured Entrypoint wrapped with ship_logs.py.
func (*TaskSpec) ResolveWorkDir ¶
func (t *TaskSpec) ResolveWorkDir()
ResolveWorkDir resolves the work dir.
func (*TaskSpec) ToDispatcherManifest ¶
func (t *TaskSpec) ToDispatcherManifest( syslog *logrus.Entry, allocationID string, tlsEnabled bool, masterHost string, masterPort int, certificateName string, numSlots int, slotType device.Type, slurmPartition string, tresSupported bool, gresSupported bool, containerRunType string, isPbsLauncher bool, labelMode *string, disabledNodes []string, ) (*launcher.Manifest, string, string, error)
ToDispatcherManifest creates the manifest that will be ultimately sent to the launcher. Returns:
Manifest, launchingUserName, PayloadName, err
Note: Cannot pass "req *sproto.AllocateRequest" as an argument, as it requires import of "github.com/determined-ai/determined/master/internal/sproto", which results in an "import cycle not allowed" error.
func (*TaskSpec) ToDockerSpec ¶
ToDockerSpec converts a task spec to a docker container spec.
type TaskSpecifier ¶
type TaskSpecifier interface {
ToTaskSpec() TaskSpec
}
TaskSpecifier creates a TaskSpec. ToTaskSpec must only be called once per specifier.
type TrialSpec ¶
type TrialSpec struct {
Base TaskSpec
ExperimentID int
TrialID int
TrialRunID int
ExperimentConfig expconf.ExperimentConfig
HParams map[string]interface{}
TrialSeed uint32
LatestCheckpoint *model.Checkpoint
StepsCompleted int
Keys ssh.PrivateAndPublicKeys
}
TrialSpec is a description of a task for running a trial container.
func (*TrialSpec) MakeEnvPorts ¶
func (s *TrialSpec) MakeEnvPorts() expconf.EnvironmentConfigV0
MakeEnvPorts fills in `Environment.Ports` i.e. exposed ports for container config.
func (*TrialSpec) ProxyPorts ¶
func (s *TrialSpec) ProxyPorts() expconf.ProxyPortsConfig
ProxyPorts combines user-defined and system proxy configs.
func (TrialSpec) ToTaskSpec ¶
ToTaskSpec generates a TaskSpec.