Documentation
¶
Index ¶
- func AddAllocationAcceleratorData(ctx context.Context, accData model.AcceleratorData) error
- type AllocationExited
- type AllocationService
- type AllocationSignal
- type AllocationState
- type ErrAllocationUnfulfilled
- type ErrAlreadyCancelled
- type ErrBehaviorDisabled
- type ErrBehaviorUnsupported
- type ErrNoAllocation
- type ErrStaleContainer
- type ErrStaleResources
- type ErrStaleResourcesReceived
- type ErrTimeoutExceeded
- type RendezvousInfoOrError
- type RendezvousWatcher
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func AddAllocationAcceleratorData ¶
func AddAllocationAcceleratorData(ctx context.Context, accData model.AcceleratorData, ) error
AddAllocationAcceleratorData stores acceleration data for an allocation.
Types ¶
type AllocationExited ¶
type AllocationExited struct {
// userRequestedStop is when a container unexpectedly exits with 0.
UserRequestedStop bool
Err error
FinalState AllocationState
}
AllocationExited summarizes the exit status of an allocation.
func (*AllocationExited) String ¶
func (a *AllocationExited) String() string
type AllocationService ¶
type AllocationService interface {
GetAllAllocationIDs() []model.AllocationID
StartAllocation(
logCtx logger.Context,
req sproto.AllocateRequest,
db db.DB,
rm rm.ResourceManager,
specifier tasks.TaskSpecifier,
onExit func(*AllocationExited),
) error
AwaitTermination(id model.AllocationID)
Signal(
id model.AllocationID,
sig AllocationSignal,
reason string,
) error
State(id model.AllocationID) (AllocationState, error)
SetReady(ctx context.Context, id model.AllocationID) error
SetWaiting(ctx context.Context, id model.AllocationID) error
SetProxyAddress(
ctx context.Context,
id model.AllocationID,
addr string,
) error
GetAllocation(
ctx context.Context,
allocallocationID string,
) (*model.Allocation, error)
SetAcceleratorData(
ctx context.Context,
accData model.AcceleratorData,
) error
WatchRendezvous(
ctx context.Context,
id model.AllocationID,
rID sproto.ResourcesID,
) (*trialv1.RendezvousInfo, error)
SetResourcesAsDaemon(
ctx context.Context,
id model.AllocationID,
rID sproto.ResourcesID,
) error
AllGather(
ctx context.Context,
allocationID model.AllocationID,
id uuid.UUID,
numPeers int,
data any,
) ([]any, error)
WatchPreemption(ctx context.Context, id model.AllocationID) (bool, error)
AckPreemption(ctx context.Context, id model.AllocationID) error
SendLog(
ctx context.Context,
id model.AllocationID,
log *sproto.ContainerLog,
)
}
AllocationService allows callers to launch, direct and query allocations.
var DefaultService AllocationService = newAllocationService()
DefaultService is the singleton default allocationService.
type AllocationSignal ¶
type AllocationSignal string
AllocationSignal is an interface for signals that can be sent to an allocation.
const ( // KillAllocation is the signal to kill an allocation; analogous to SIGKILL. KillAllocation AllocationSignal = "kill" // TerminateAllocation is the signal to kill an allocation; analogous to SIGTERM. TerminateAllocation AllocationSignal = "terminate" )
type AllocationState ¶
type AllocationState struct {
State model.AllocationState
Resources map[sproto.ResourcesID]sproto.ResourcesSummary
Ready bool
Addresses map[sproto.ResourcesID][]cproto.Address
Containers map[sproto.ResourcesID][]cproto.Container
}
AllocationState requests allocation state. A copy is filled and returned.
func (AllocationState) SingleContainer ¶
func (a AllocationState) SingleContainer() *cproto.Container
SingleContainer returns a single random container from the allocation state.
func (AllocationState) SingleContainerAddresses ¶
func (a AllocationState) SingleContainerAddresses() []cproto.Address
SingleContainerAddresses returns a single random container's addresses from the allocation state.
type ErrAllocationUnfulfilled ¶
type ErrAllocationUnfulfilled struct {
Action string
}
ErrAllocationUnfulfilled is returned an operation is tried without an active allocation.
func (ErrAllocationUnfulfilled) Error ¶
func (e ErrAllocationUnfulfilled) Error() string
type ErrAlreadyCancelled ¶
type ErrAlreadyCancelled struct{}
ErrAlreadyCancelled is returned to the allocation when it tries to take an action but has an unread cancellation in its inbox.
func (ErrAlreadyCancelled) Error ¶
func (e ErrAlreadyCancelled) Error() string
type ErrBehaviorDisabled ¶
type ErrBehaviorDisabled struct {
Behavior string
}
ErrBehaviorDisabled is returned an operation is tried without the behavior being enabled.
func (ErrBehaviorDisabled) Error ¶
func (e ErrBehaviorDisabled) Error() string
type ErrBehaviorUnsupported ¶
type ErrBehaviorUnsupported struct {
Behavior string
}
ErrBehaviorUnsupported is returned an operation is tried without the behavior being supported.
func (ErrBehaviorUnsupported) Error ¶
func (e ErrBehaviorUnsupported) Error() string
type ErrNoAllocation ¶
type ErrNoAllocation struct {
Action string
}
ErrNoAllocation is returned an operation is tried without a requested allocation.
func (ErrNoAllocation) Error ¶
func (e ErrNoAllocation) Error() string
type ErrStaleContainer ¶
ErrStaleContainer is returned when an operation was attempted by a stale container.
func (ErrStaleContainer) Error ¶
func (e ErrStaleContainer) Error() string
type ErrStaleResources ¶
type ErrStaleResources struct {
ID sproto.ResourcesID
}
ErrStaleResources is returned when an operation was attempted by a stale resources.
func (ErrStaleResources) Error ¶
func (e ErrStaleResources) Error() string
type ErrStaleResourcesReceived ¶
type ErrStaleResourcesReceived struct{}
ErrStaleResourcesReceived is returned the scheduler gives an allocation resources between when it requests them and it deciding, for some reason or another, they are not needed.
func (ErrStaleResourcesReceived) Error ¶
func (e ErrStaleResourcesReceived) Error() string
type ErrTimeoutExceeded ¶
type ErrTimeoutExceeded struct {
Message string
}
ErrTimeoutExceeded is return, with a bit of detail, when a timeout is exceeded.
func (ErrTimeoutExceeded) Error ¶
func (e ErrTimeoutExceeded) Error() string
type RendezvousInfoOrError ¶
type RendezvousInfoOrError struct {
Info *trialv1.RendezvousInfo
Err error
}
RendezvousInfoOrError contains either rendezvous info or an error from failing to materialize it.
type RendezvousWatcher ¶
type RendezvousWatcher struct {
C <-chan RendezvousInfoOrError
}
RendezvousWatcher contains a channel which can be polled for rendezvous info.