Documentation
¶
Index ¶
- Constants
- Variables
- func BuildKubeConfig(sConfig *Config) *rest.Config
- type AffinityGroupMemberBindInfo
- type AffinityGroupMemberSpec
- type AffinityGroupSpec
- type CellAddress
- type CellType
- type CellTypeSpec
- type Config
- type PhysicalCellSpec
- type PhysicalClusterSpec
- type PodBindInfo
- type PodPlacementInfo
- type PodSchedulingSpec
- type ReservationId
- type ReservedCellSpec
- type VirtualCellSpec
- type VirtualClusterName
- type VirtualClusterSpec
- type WebServerError
- type WebServerPaths
Constants ¶
const ( ComponentName = "hivedscheduler" GroupName = "hivedscheduler.microsoft.com" DefaultConfigFilePath = "./hivedscheduler.yaml" UnlimitedValue = -1 // To leverage this scheduler, at least one container in the Pod should contain // below resource limit with any positive int16 value. ResourceNamePodSchedulingEnable = GroupName + "/pod-scheduling-enable" // To leverage this scheduler, the Pod should contain below annotation in // PodSchedulingSpec YAML format. AnnotationKeyPodSchedulingSpec = GroupName + "/pod-scheduling-spec" // To leverage this scheduler, if one container in the Pod want to use the // allocated GPUs for the whole Pod, it should contain below env. // env: // - name: NVIDIA_VISIBLE_DEVICES // valueFrom: // fieldRef: // fieldPath: metadata.annotations['hivedscheduler.microsoft.com/pod-gpu-isolation'] // The annotation referred by the env will be populated by scheduler when bind the pod. // // Notes: // 1. The scheduler directly delivers GPU isolation decision to // nvidia-container-runtime through Pod Env: NVIDIA_VISIBLE_DEVICES. // 2. If multiple containers in the Pod contain the env, the allocated GPUs are // all visible to them, so it is these containers' freedom to control how // to share these GPUs. EnvNameNvidiaVisibleDevices = "NVIDIA_VISIBLE_DEVICES" AnnotationKeyPodGpuIsolation = GroupName + "/pod-gpu-isolation" // Populated by this scheduler, used to track and recover allocated placement. // It is in PodBindInfo YAML format. AnnotationKeyPodBindInfo = GroupName + "/pod-bind-info" // Lowest priority of regular pods. A pod with a priority >= 0 will be considered // as a guaranteed one; opportunistic otherwise. RegularPriority = int32(0) )
///////////////////////////////////////////////////////////////////////////////////// General Constants /////////////////////////////////////////////////////////////////////////////////////
const ( RootPath = "/" VersionPath = RootPath + "v1" // Scheduler Extender API: API with K8S Default Scheduler ExtenderPath = VersionPath + "/extender" FilterPath = ExtenderPath + "/filter" BindPath = ExtenderPath + "/bind" PreemptPath = ExtenderPath + "/preempt" )
///////////////////////////////////////////////////////////////////////////////////// WebServer Constants /////////////////////////////////////////////////////////////////////////////////////
Variables ¶
var DefaultKubeConfigFilePath = os.Getenv("HOME") + "/.kube/config"
var EnvValueKubeApiServerAddress = os.Getenv("KUBE_APISERVER_ADDRESS")
var EnvValueKubeConfigFilePath = os.Getenv("KUBECONFIG")
Functions ¶
func BuildKubeConfig ¶
Types ¶
type AffinityGroupMemberBindInfo ¶ added in v0.2.0
type AffinityGroupMemberBindInfo struct {
PodPlacements []PodPlacementInfo `yaml:"podPlacements"`
}
type AffinityGroupMemberSpec ¶ added in v0.2.0
type AffinityGroupSpec ¶ added in v0.2.0
type AffinityGroupSpec struct {
Name string `yaml:"name"`
Members []AffinityGroupMemberSpec `yaml:"members"`
}
type CellAddress ¶
type CellAddress string
///////////////////////////////////////////////////////////////////////////////////// General Types /////////////////////////////////////////////////////////////////////////////////////
type CellType ¶
type CellType string
///////////////////////////////////////////////////////////////////////////////////// General Types /////////////////////////////////////////////////////////////////////////////////////
type CellTypeSpec ¶
type Config ¶
type Config struct {
// KubeApiServerAddress is default to ${KUBE_APISERVER_ADDRESS}.
// KubeConfigFilePath is default to ${KUBECONFIG} then falls back to ${HOME}/.kube/config.
//
// If both KubeApiServerAddress and KubeConfigFilePath after defaulting are still empty, falls back to the
// [k8s inClusterConfig](https://kubernetes.io/docs/tasks/access-application-cluster/access-cluster/#accessing-the-api-from-a-pod).
//
// If both KubeApiServerAddress and KubeConfigFilePath after defaulting are not empty,
// KubeApiServerAddress overrides the server address specified in the file referred by KubeConfigFilePath.
//
// If only KubeApiServerAddress after defaulting is not empty, it should be an insecure ApiServer address (can be got from
// [Insecure ApiServer](https://kubernetes.io/docs/reference/access-authn-authz/controlling-access/#api-server-ports-and-ips) or
// [kubectl proxy](https://kubernetes.io/docs/tasks/access-application-cluster/access-cluster/#using-kubectl-proxy))
// which does not enforce authentication.
//
// If only KubeConfigFilePath after defaulting is not empty, it should be an valid
// [KubeConfig File](https://kubernetes.io/docs/tasks/access-application-cluster/configure-access-multiple-clusters/#explore-the-home-kube-directory)
// which inlines or refers the valid
// [ApiServer Credential Files](https://kubernetes.io/docs/reference/access-authn-authz/controlling-access/#transport-security).
//
// Address should be in format http[s]://host:port
KubeApiServerAddress *string `yaml:"kubeApiServerAddress"`
KubeConfigFilePath *string `yaml:"kubeConfigFilePath"`
// WebServer
// Default to :9096
WebServerAddress *string `yaml:"webServerAddress"`
// Specify a threshold for PodBindAttempts, that after it is exceeded, an extra
// Pod binding will be executed forcefully.
ForcePodBindThreshold *int32 `yaml:"forcePodBindThreshold"`
// Specify the whole physical cluster
// TODO: Automatically construct it based on node info from GPU and Network Device Plugins
PhysicalCluster *PhysicalClusterSpec `yaml:"physicalCluster"`
// Specify all the virtual clusters belongs to the physical cluster
VirtualClusters *map[VirtualClusterName]VirtualClusterSpec `yaml:"virtualClusters"`
}
type PhysicalCellSpec ¶
type PhysicalCellSpec struct {
CellType CellType `yaml:"cellType"`
CellAddress CellAddress `yaml:"cellAddress"`
ReservationId ReservationId `yaml:"reservationId"`
CellChildren []PhysicalCellSpec `yaml:"cellChildren,omitempty"`
}
Specify physical Cell instances.
type PhysicalClusterSpec ¶
type PhysicalClusterSpec struct {
CellTypes map[CellType]CellTypeSpec `yaml:"cellTypes"`
PhysicalCells []PhysicalCellSpec `yaml:"physicalCells"`
}
Physical cluster definition
type PodBindInfo ¶
type PodBindInfo struct {
// The node to bind
Node string `yaml:"node"`
// The GPUs to bind
GpuIsolation []int32 `yaml:"gpuIsolation"`
CellChain string `yaml:"cellChain"`
AffinityGroupBindInfo []AffinityGroupMemberBindInfo `yaml:"affinityGroupBindInfo"`
}
Used to recover scheduler allocated resource
type PodPlacementInfo ¶ added in v0.2.0
type PodPlacementInfo struct {
PhysicalNode string `yaml:"physicalNode"`
PhysicalGpuIndices []int32 `yaml:"physicalGpuIndices"`
// levels of the preassigned cells used by the pods. used to locate the virtual cells
// when adding an allocated pod
PreassignedCellLevels []int32 `yaml:"preassignedCellLevels"`
}
type PodSchedulingSpec ¶
type PodSchedulingSpec struct {
VirtualCluster VirtualClusterName `yaml:"virtualCluster"`
Priority int32 `yaml:"priority"`
ReservationId ReservationId `yaml:"reservationId"`
GpuType string `yaml:"gpuType"`
GpuNumber int32 `yaml:"gpuNumber"`
AffinityGroup *AffinityGroupSpec `yaml:"affinityGroup"`
}
type ReservationId ¶
type ReservationId string
///////////////////////////////////////////////////////////////////////////////////// General Types /////////////////////////////////////////////////////////////////////////////////////
type ReservedCellSpec ¶
type ReservedCellSpec struct {
ReservationId ReservationId `yaml:"reservationId"`
}
type VirtualCellSpec ¶
type VirtualClusterSpec ¶
type VirtualClusterSpec struct {
VirtualCells []VirtualCellSpec `yaml:"virtualCells"`
ReservedCells []ReservedCellSpec `yaml:"reservedCells,omitempty"`
}
type WebServerError ¶
func NewWebServerError ¶
func NewWebServerError(code int, message string) *WebServerError
func (*WebServerError) Error ¶
func (err *WebServerError) Error() string
type WebServerPaths ¶
type WebServerPaths struct {
Paths []string `json:"paths"`
}