Documentation
¶
Index ¶
- Constants
- func CheckDeviceType(annotations map[string]string, deviceType string) bool
- func CheckDeviceUuid(annotations map[string]string, deviceUUID string) bool
- func FilterAllocatingPods(activePods []corev1.Pod) []corev1.Pod
- func GetAllocatableOfNode(node *corev1.Node, resourceName string) int
- func GetCapacityOfNode(node *corev1.Node, resourceName string) int
- func GetContainerRuntime(pod *corev1.Pod, containerName string) (runtimeName string, containerId string)
- func GetContainerStatus(pod *corev1.Pod, containerName string) (*corev1.ContainerStatus, bool)
- func GetCurrentPodByAllocatingPods(allocatingPods []corev1.Pod) (*corev1.Pod, error)
- func GetK8sPodCGroupFullPath(podCGroupPath string) string
- func GetK8sPodCGroupPath(pod *corev1.Pod) (string, error)
- func GetK8sPodContainerCGroupFullPath(pod *corev1.Pod, containerName string, getFullPath func(string) string) (string, error)
- func GetK8sPodDeviceCGroupFullPath(podCGroupPath string) string
- func GetNumaInformation(idx int) (int, error)
- func GetPredicateTimeOfPod(pod corev1.Pod) uint64
- func GetResourceOfContainer(container *corev1.Container, resourceName corev1.ResourceName) int
- func GetResourceOfPod(pod *corev1.Pod, resourceName corev1.ResourceName) int
- func HasAnnotation(obj metav1.Object, anno string) (val string, ok bool)
- func HasLabel(obj metav1.Object, label string) (val string, ok bool)
- func InitializeCGroupDriver(cgroupDriver string)
- func InsertAnnotation(obj metav1.Object, k, v string)
- func IsShouldDeletePod(pod *corev1.Pod) bool
- func IsVGPUEnabledNode(node *corev1.Node) bool
- func IsVGPURequiredContainer(c *corev1.Container) bool
- func IsVGPUResourcePod(pod *corev1.Pod) bool
- func ParseContainerRuntime(podContainerId string) (runtimeName string, containerId string)
- func PathIsNotExist(fullPath string) bool
- func PodIsTerminated(pod *corev1.Pod) bool
- func ShouldRetry(err error) bool
- func SplitK8sCGroupBasePath(cgroupFullPath string) string
- func SystemdPathPrefixOfRuntime(runtimeName string) string
- type AssignedPhase
- type CGroupDriver
- type CgroupName
- type ComputePolicy
- type PodsOrderedByPredicateTime
- type SchedulerPolicy
- type TopologyMode
Constants ¶
const ( KubeletConfigPath = "/var/lib/kubelet/config.yaml" CGroupBasePath = "/sys/fs/cgroup" CGroupDevicePath = CGroupBasePath + "/devices" SYSTEMD CGroupDriver = "systemd" CGROUPFS CGroupDriver = "cgroupfs" )
const ( DomainPrefix = "nvidia.com" // VGPUComputePolicyAnnotation none / balance / fixed(default) VGPUComputePolicyAnnotation = DomainPrefix + "/vgpu-compute-policy" NodeNvidiaDriverVersionLabel = DomainPrefix + "/node-driver-version" NodeNvidiaCudaVersionLabel = DomainPrefix + "/node-cuda-version" VGPUNumberResourceName = DomainPrefix + "/vgpu-number" VGPUMemoryResourceName = DomainPrefix + "/vgpu-memory" VGPUCoreResourceName = DomainPrefix + "/vgpu-cores" // NodeDeviceHeartbeatAnnotation Node device heartbeat time NodeDeviceHeartbeatAnnotation = DomainPrefix + "/node-device-heartbeat" NodeDeviceRegisterAnnotation = DomainPrefix + "/node-device-register" NodeDeviceTopologyAnnotation = DomainPrefix + "/node-device-topology" DeviceMemoryFactorAnnotation = DomainPrefix + "/device-memory-factor" // PodIncludeGpuTypeAnnotation Specify the GPU type to be used PodIncludeGpuTypeAnnotation = DomainPrefix + "/include-gpu-type" // PodExcludeGpuTypeAnnotation Specify the GPU type to exclude PodExcludeGpuTypeAnnotation = DomainPrefix + "/exclude-gpu-type" // Scheduling strategies at the node and device levels NodeSchedulerPolicyAnnotation = DomainPrefix + "/node-scheduler-policy" DeviceSchedulerPolicyAnnotation = DomainPrefix + "/device-scheduler-policy" // DeviceTopologyModeAnnotation Specify device topology mode DeviceTopologyModeAnnotation = DomainPrefix + "/device-topology-mode" // PodIncludeGPUUUIDAnnotation Specify the GPU UUID to be used PodIncludeGPUUUIDAnnotation = DomainPrefix + "/include-gpu-uuid" // PodExcludeGPUUUIDAnnotation Specify the GPU UUID to be excluded PodExcludeGPUUUIDAnnotation = DomainPrefix + "/exclude-gpu-uuid" PodPredicateNodeAnnotation = DomainPrefix + "/predicate-node" PodPredicateTimeAnnotation = DomainPrefix + "/predicate-time" PodAssignedPhaseLabel = DomainPrefix + "/assigned-phase" // PodVGPUPreAllocAnnotation Pre allocated device information by the scheduler PodVGPUPreAllocAnnotation = DomainPrefix + "/pre-allocated" // PodVGPURealAllocAnnotation Real device information allocated by device plugins PodVGPURealAllocAnnotation = DomainPrefix + "/real-allocated" HundredCore = 100 MaxDeviceNumber = 16 // MaxContainerLimit max container num MaxContainerLimit = 300000 // PodAnnotationMaxLength pod annotation max data length 1MB PodAnnotationMaxLength = 1024 * 1024 AllocateCheckErrMsg = "Allocate check failed" PreStartContainerCheckErrMsg = "PreStartContainer check failed" PreStartContainerCheckErrType = "PreStartContainerCheckErr" )
const ( // CUDA_MEM_LIMIT_<index> gpu memory limit CudaMemoryLimitEnv = "CUDA_MEM_LIMIT" // CUDA_MEM_RATIO_<index> gpu memory ratio CudaMemoryRatioEnv = "CUDA_MEM_RATIO" // CUDA_CORE_LIMIT_<index> gpu core limit CudaCoreLimitEnv = "CUDA_CORE_LIMIT" // CUDA_CORE_SOFT_LIMIT_<index> gpu core soft limit CudaSoftCoreLimitEnv = "CUDA_CORE_SOFT_LIMIT" // CUDA_CORE_SOFT_LIMIT_<index> gpu memory oversold switch CudaMemoryOversoldEnv = "CUDA_MEM_OVERSOLD" // GPU_DEVICES_UUID gpu uuid list GPUDevicesUuidEnv = "GPU_DEVICES_UUID" // CompatibilityModeEnv Indicate the compatibility mode of the environment CompatibilityModeEnv = "ENV_COMPATIBILITY_MODE" PodNameEnv = "VGPU_POD_NAME" PodNamespaceEnv = "VGPU_POD_NAMESPACE" PodUIDEnv = "VGPU_POD_UID" ContNameEnv = "VGPU_CONTAINER_NAME" )
const ( DeviceListStrategyEnvvar = "envvar" DeviceListStrategyVolumeMounts = "volume-mounts" DeviceListStrategyCDIAnnotations = "cdi-annotations" DeviceListStrategyCDICRI = "cdi-cri" )
Constants to represent the various device list strategies
Variables ¶
This section is empty.
Functions ¶
func CheckDeviceType ¶
CheckDeviceType Check if the device type meets expectations.
func CheckDeviceUuid ¶
CheckDeviceUuid Check if the device uuid meets expectations.
func FilterAllocatingPods ¶
FilterAllocatingPods filter out the list of pods to be allocated.
func GetAllocatableOfNode ¶
GetAllocatableOfNode Return the number of resources that can be allocated to the node.
func GetCapacityOfNode ¶
GetCapacityOfNode Return the capacity of node resources.
func GetContainerRuntime ¶
func GetContainerStatus ¶
func GetCurrentPodByAllocatingPods ¶
GetCurrentPodByAllocatingPods find the oldest Pod from the allocating Pods to be allocated as the current Pod to be allocated.
func GetK8sPodCGroupFullPath ¶
GetK8sPodCGroupFullPath Obtain the cgroupv2 full path of the pod.
func GetK8sPodCGroupPath ¶
GetK8sPodCGroupPath Obtain the relative path of pod cgroup for k8s.
func GetK8sPodDeviceCGroupFullPath ¶
GetK8sPodDeviceCGroupFullPath Obtain the full path of the cgroup device subsystem of the pod.
func GetNumaInformation ¶
func GetPredicateTimeOfPod ¶
func GetResourceOfContainer ¶
func GetResourceOfContainer(container *corev1.Container, resourceName corev1.ResourceName) int
GetResourceOfContainer Return the number of resource limit.
func GetResourceOfPod ¶
func GetResourceOfPod(pod *corev1.Pod, resourceName corev1.ResourceName) int
GetResourceOfPod Return the number of resource limit for all containers of Pod.
func InitializeCGroupDriver ¶
func InitializeCGroupDriver(cgroupDriver string)
func InsertAnnotation ¶
func IsShouldDeletePod ¶
IsShouldDeletePod Determine whether the pod has been deleted or needs to be deleted.
func IsVGPUEnabledNode ¶
IsVGPUEnabledNode Determine whether there are VGPU devices on the node.
func IsVGPURequiredContainer ¶
IsVGPURequiredContainer tell if the container is a vGPU request container.
func IsVGPUResourcePod ¶
IsVGPUResourcePod Determine if a pod has vGPU resource request.
func ParseContainerRuntime ¶
func PathIsNotExist ¶
func PodIsTerminated ¶
func ShouldRetry ¶
ShouldRetry Determine whether the error of apiserver is of the type that needs to be retried.
func SplitK8sCGroupBasePath ¶
Types ¶
type AssignedPhase ¶
type AssignedPhase string
const ( AssignPhaseSucceed AssignedPhase = "succeed" AssignPhaseAllocating AssignedPhase = "allocating" AssignPhaseFailed AssignedPhase = "failed" )
type CGroupDriver ¶
type CGroupDriver string
type CgroupName ¶
type CgroupName []string
func NewPodCgroupName ¶
func NewPodCgroupName(pod *corev1.Pod) CgroupName
func (CgroupName) ToCgroupfs ¶
func (cgroupName CgroupName) ToCgroupfs() string
func (CgroupName) ToSystemd ¶
func (cgroupName CgroupName) ToSystemd() string
cgroupName.ToSystemd converts the internal cgroup name to a systemd name. For example, the name {"kubepods", "burstable", "pod1234-abcd-5678-efgh"} becomes "/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod1234_abcd_5678_efgh.slice" This function always expands the systemd name into the cgroupfs form. If only the last part is needed, use path.Base(...) on it to discard the rest.
type ComputePolicy ¶
type ComputePolicy string
const ( // NoneComputePolicy There are no computing power limitations, tasks compete for GPUs on their own. NoneComputePolicy ComputePolicy = "none" // BalanceComputePolicy Automatically balance GPU load, maximize GPU utilization, // allocate idle computing power to ongoing tasks, // and roll back excess computing power when new tasks require it. BalanceComputePolicy ComputePolicy = "balance" // FixedComputePolicy Run tasks with a fixed computing power quota, // and the utilization rate will not exceed the quota. // Default strategy FixedComputePolicy ComputePolicy = "fixed" // default )
type PodsOrderedByPredicateTime ¶
func (PodsOrderedByPredicateTime) Len ¶
func (pods PodsOrderedByPredicateTime) Len() int
func (PodsOrderedByPredicateTime) Less ¶
func (pods PodsOrderedByPredicateTime) Less(i, j int) bool
func (PodsOrderedByPredicateTime) Swap ¶
func (pods PodsOrderedByPredicateTime) Swap(i, j int)
type SchedulerPolicy ¶
type SchedulerPolicy string
const ( NonePolicy SchedulerPolicy = "none" // BinpackPolicy means the lower device memory remained after this allocation, the better BinpackPolicy SchedulerPolicy = "binpack" // SpreadPolicy means better put this task into an idle GPU card than a shared GPU card SpreadPolicy SchedulerPolicy = "spread" )
type TopologyMode ¶ added in v0.3.0
type TopologyMode string
const ( // NoneTopology Do not use any topology mode to allocate devices. NoneTopology TopologyMode = "none" // NUMATopology aligns the allocated devices according to numa nodes. NUMATopology TopologyMode = "numa" // LinkTopology find the best device set based on link topology. LinkTopology TopologyMode = "link" )