Documentation
¶
Overview ¶
Package gpuallocator handles GPU allocation
Index ¶
- Constants
- Variables
- func IsScalingQuotaExceededError(err error) bool
- func RefreshGPUNodeCapacity(ctx context.Context, k8sClient client.Client, node *tfv1.GPUNode, ...) ([]string, error)
- type CompactFirst
- type GpuAllocator
- func (s *GpuAllocator) AdjustAllocation(ctx context.Context, adjustRequest tfv1.AdjustRequest, dryRun bool) (tfv1.Resource, error)
- func (s *GpuAllocator) Alloc(req *tfv1.AllocRequest) ([]*tfv1.GPU, error)
- func (s *GpuAllocator) Bind(gpuNames []string, req *tfv1.AllocRequest) ([]*tfv1.GPU, error)
- func (s *GpuAllocator) CheckQuotaAndFilter(ctx context.Context, req *tfv1.AllocRequest) ([]tfv1.GPU, error)
- func (s *GpuAllocator) ComposeAllocationRequest(pod *v1.Pod) (tfv1.AllocRequest, string, error)
- func (s *GpuAllocator) Dealloc(workloadNameNamespace tfv1.NameNamespace, gpus []string, ...)
- func (s *GpuAllocator) Filter(req *tfv1.AllocRequest, toFilterGPUs []tfv1.GPU) ([]tfv1.GPU, error)
- func (s *GpuAllocator) GetQuotaStore() *quota.QuotaStore
- func (s *GpuAllocator) InitGPUAndQuotaStore() error
- func (s *GpuAllocator) ReconcileAllocationState()
- func (s *GpuAllocator) Score(ctx context.Context, cfg *config.GPUFitConfig, req tfv1.AllocRequest, ...) map[string]map[string]int
- func (s *GpuAllocator) Select(req *tfv1.AllocRequest, filteredGPUs []tfv1.GPU) ([]*tfv1.GPU, error)
- func (s *GpuAllocator) SetMaxWorkerPerNode(maxWorkerPerNode int)
- func (s *GpuAllocator) SetupWithManager(ctx context.Context, mgr manager.Manager) (<-chan struct{}, error)
- func (s *GpuAllocator) StartInformerForGPU(ctx context.Context, mgr manager.Manager) error
- func (s *GpuAllocator) Stop()
- func (s *GpuAllocator) SyncGPUsToK8s()
- type LowLoadFirst
- type Strategy
Constants ¶
const MaxGPUCounterPerAllocation = 128
Variables ¶
var ScalingQuotaExceededError = goerrors.New("scaling quota exceeded")
Functions ¶
func IsScalingQuotaExceededError ¶ added in v1.35.0
Types ¶
type CompactFirst ¶
type CompactFirst struct {
// contains filtered or unexported fields
}
CompactFirst selects GPU with minimum available resources (most utilized) to efficiently pack workloads and maximize GPU utilization
func (CompactFirst) Score ¶ added in v1.35.0
func (c CompactFirst) Score(gpu tfv1.GPU) int
Score function is using by Kubernetes scheduler framework
func (CompactFirst) SelectGPUs ¶
SelectGPUs selects multiple GPUs from the same node with the least available resources (most packed)
type GpuAllocator ¶
func NewGpuAllocator ¶
func (*GpuAllocator) AdjustAllocation ¶ added in v1.35.0
func (s *GpuAllocator) AdjustAllocation(ctx context.Context, adjustRequest tfv1.AdjustRequest, dryRun bool) (tfv1.Resource, error)
Used for scale up decision, dryRun to pre-check capacity to determine if the allocation is valid when scaling up, return error and the max new requests/limits on existing GPU Auto scaler can directly call AdjustAllocation for scaling down decision it has to call AdjustAllocation with dryRun=true when scaling up, if return error is ScalingQuotaExceededError, it means the allocation is invalid, and it should scale up with another AdjustRequest to make sure not exceed quota, which returns in the first returned result retry until AdjustAllocation returns nil error, at most pre-configured maxRetry times
func (*GpuAllocator) Alloc ¶
func (s *GpuAllocator) Alloc(req *tfv1.AllocRequest) ([]*tfv1.GPU, error)
Alloc allocates a request to a gpu or multiple gpus from the same node. This is now implemented as a combination of Filter and Bind for backward compatibility.
func (*GpuAllocator) Bind ¶ added in v1.35.0
func (s *GpuAllocator) Bind( gpuNames []string, req *tfv1.AllocRequest, ) ([]*tfv1.GPU, error)
Bind allocates resources on the provided GPUs for the given request. It updates the in-memory store and marks the GPUs as dirty for syncing.
func (*GpuAllocator) CheckQuotaAndFilter ¶ added in v1.35.0
func (s *GpuAllocator) CheckQuotaAndFilter(ctx context.Context, req *tfv1.AllocRequest) ([]tfv1.GPU, error)
func (*GpuAllocator) ComposeAllocationRequest ¶ added in v1.35.0
func (s *GpuAllocator) ComposeAllocationRequest(pod *v1.Pod) (tfv1.AllocRequest, string, error)
func (*GpuAllocator) Dealloc ¶
func (s *GpuAllocator) Dealloc( workloadNameNamespace tfv1.NameNamespace, gpus []string, podMeta metav1.ObjectMeta, )
Dealloc a request from gpu to release available resources on it.
func (*GpuAllocator) Filter ¶ added in v1.35.0
func (s *GpuAllocator) Filter(req *tfv1.AllocRequest, toFilterGPUs []tfv1.GPU) ([]tfv1.GPU, error)
Filter applies filters to a pool of GPUs based on the provided request and returns selected GPUs. It does not modify the GPU resources, only filters and selects them.
func (*GpuAllocator) GetQuotaStore ¶ added in v1.35.0
func (s *GpuAllocator) GetQuotaStore() *quota.QuotaStore
func (*GpuAllocator) InitGPUAndQuotaStore ¶ added in v1.35.0
func (s *GpuAllocator) InitGPUAndQuotaStore() error
InitGPUAndQuotaStore initializes both GPU store and quota store from Kubernetes
func (*GpuAllocator) ReconcileAllocationState ¶ added in v1.35.0
func (s *GpuAllocator) ReconcileAllocationState()
When it's leader, should reconcile state based on existing workers this function is run inside storeMutex lock
func (*GpuAllocator) Score ¶ added in v1.35.0
func (s *GpuAllocator) Score(ctx context.Context, cfg *config.GPUFitConfig, req tfv1.AllocRequest, validNodeGPUs map[string][]tfv1.GPU) map[string]map[string]int
First level is k8s node name, second level is GPU name, value is score
func (*GpuAllocator) Select ¶ added in v1.35.0
func (s *GpuAllocator) Select(req *tfv1.AllocRequest, filteredGPUs []tfv1.GPU) ([]*tfv1.GPU, error)
func (*GpuAllocator) SetMaxWorkerPerNode ¶ added in v1.35.0
func (s *GpuAllocator) SetMaxWorkerPerNode(maxWorkerPerNode int)
AllocRequest encapsulates all parameters needed for GPU allocation
func (*GpuAllocator) SetupWithManager ¶
func (s *GpuAllocator) SetupWithManager(ctx context.Context, mgr manager.Manager) (<-chan struct{}, error)
SetupWithManager sets up the GpuAllocator with the Manager.
func (*GpuAllocator) StartInformerForGPU ¶ added in v1.35.0
func (*GpuAllocator) SyncGPUsToK8s ¶ added in v1.35.0
func (s *GpuAllocator) SyncGPUsToK8s()
SyncGPUsToK8s syncs GPU status to Kubernetes
type LowLoadFirst ¶
type LowLoadFirst struct {
// contains filtered or unexported fields
}
LowLoadFirst selects GPU with maximum available resources (least utilized) to distribute workloads more evenly across GPUs
func (LowLoadFirst) Score ¶ added in v1.35.0
func (l LowLoadFirst) Score(gpu tfv1.GPU) int
Score function is using by Kubernetes scheduler framework
func (LowLoadFirst) SelectGPUs ¶
SelectGPUs selects multiple GPUs from the same node with the most available resources (least loaded)