Documentation
¶
Overview ¶
Package v1 contains API Schema definitions for the tensor-fusion.ai v1 API group. +kubebuilder:object:generate=true +groupName=tensor-fusion.ai
Index ¶
- Constants
- Variables
- type AdjustRequest
- type AllocRequest
- type AuthTypeEnum
- type AutoFreeze
- type AutoFreezeAndResume
- type AutoScalingConfig
- type AutoSetLimits
- type AutoSetReplicas
- type AutoSetRequests
- type BudgetExceedStrategy
- type CapacityConfig
- type ClientConfig
- type ComponentConfig
- type ComputingVendorConfig
- type ComputingVendorName
- type ComputingVendorParams
- type GPU
- type GPUAllocationInfo
- type GPUFilter
- type GPUList
- type GPUNode
- type GPUNodeClass
- type GPUNodeClassList
- type GPUNodeClassSpec
- type GPUNodeClassStatus
- type GPUNodeInfo
- type GPUNodeList
- type GPUNodeManageMode
- type GPUNodeSpec
- type GPUNodeStatus
- type GPUOrCPUResourceUnit
- type GPUPool
- type GPUPoolDefinition
- type GPUPoolList
- type GPUPoolSpec
- type GPUPoolStatus
- type GPUResourceAvailablePercent
- type GPUResourcePricingUnit
- type GPUResourceQuota
- type GPUResourceQuotaConditionType
- type GPUResourceQuotaList
- type GPUResourceQuotaSingle
- type GPUResourceQuotaSpec
- type GPUResourceQuotaStatus
- type GPUResourceQuotaTotal
- type GPUResourceUnit
- type GPUResourceUsage
- type GPUStatus
- type HypervisorConfig
- type HypervisorScheduling
- type MaintenanceWindow
- type MultiProcessQueuing
- type NameNamespace
- type NodeClassBlockDeviceMappings
- type NodeClassBlockDeviceSettings
- type NodeClassItemSelectorTerms
- type NodeClassMetadataOptions
- type NodeCompaction
- type NodeDiscoveryConfig
- type NodeHypervisorStatus
- type NodeManagerConfig
- type NodeProvisioner
- type NodeRequirementKey
- type NodeRollingUpdatePolicy
- type OSImageTypeEnum
- type Oversubscription
- type PeriodicalBudget
- type PlacementConfig
- type PlacementMode
- type PoolComponentStatus
- type PoolProvisioningStatus
- type ProvisioningMode
- type QoSLevel
- type QosConfig
- type QosDefinition
- type QosPricing
- type ReBalanceThreshold
- type ReBalancerConfig
- type Requirement
- type Resource
- type Resources
- type RunningAppDetail
- type SchedulingConfigTemplate
- type SchedulingConfigTemplateList
- type SchedulingConfigTemplateSpec
- type SchedulingConfigTemplateStatus
- type SmartSchedulerModelInput
- type Taint
- type TensorFusionCluster
- func (in *TensorFusionCluster) DeepCopy() *TensorFusionCluster
- func (in *TensorFusionCluster) DeepCopyInto(out *TensorFusionCluster)
- func (in *TensorFusionCluster) DeepCopyObject() runtime.Object
- func (tfc *TensorFusionCluster) RefreshStatus(ownedPools []GPUPool)
- func (tfc *TensorFusionCluster) SetAsPending()
- func (tfc *TensorFusionCluster) SetAsReady(conditions ...metav1.Condition) bool
- func (tfc *TensorFusionCluster) SetAsUnknown(err error) bool
- func (tfc *TensorFusionCluster) SetAsUpdating(conditions ...metav1.Condition) bool
- type TensorFusionClusterList
- type TensorFusionClusterPhase
- type TensorFusionClusterSpec
- type TensorFusionClusterStatus
- type TensorFusionConnection
- type TensorFusionConnectionList
- type TensorFusionConnectionSpec
- type TensorFusionConnectionStatus
- type TensorFusionGPUNodePhase
- type TensorFusionGPUPhase
- type TensorFusionPoolPhase
- type TensorFusionWorkload
- type TensorFusionWorkloadList
- type TensorFusionWorkloadPhase
- type TensorFusionWorkloadStatus
- type UsedBySystem
- type WorkerConfig
- type WorkerPhase
- type WorkerStatus
- type WorkloadProfile
- type WorkloadProfileList
- type WorkloadProfileSpec
- type WorkloadProfileStatus
Constants ¶
const ( TensorFusionPoolPhasePending = TensorFusionPoolPhase(constants.PhasePending) TensorFusionPoolPhaseRunning = TensorFusionPoolPhase(constants.PhaseRunning) TensorFusionPoolPhaseUpdating = TensorFusionPoolPhase(constants.PhaseUpdating) TensorFusionPoolPhaseUnknown = TensorFusionPoolPhase(constants.PhaseUnknown) TensorFusionPoolPhaseDestroying = TensorFusionPoolPhase(constants.PhaseDestroying) )
const ( TensorFusionClusterPending = TensorFusionClusterPhase(constants.PhasePending) TensorFusionClusterRunning = TensorFusionClusterPhase(constants.PhaseRunning) TensorFusionClusterUpdating = TensorFusionClusterPhase(constants.PhaseUpdating) TensorFusionClusterDestroying = TensorFusionClusterPhase(constants.PhaseDestroying) TensorFusionClusterUnknown = TensorFusionClusterPhase(constants.PhaseUnknown) )
Variables ¶
var ( // GroupVersion is group version used to register these objects. GroupVersion = schema.GroupVersion{Group: "tensor-fusion.ai", Version: "v1"} // SchemeBuilder is used to add go types to the GroupVersionKind scheme. SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} // AddToScheme adds the types in this group-version to the given scheme. AddToScheme = SchemeBuilder.AddToScheme )
Functions ¶
This section is empty.
Types ¶
type AdjustRequest ¶ added in v1.35.0
func (*AdjustRequest) DeepCopy ¶ added in v1.35.0
func (in *AdjustRequest) DeepCopy() *AdjustRequest
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AdjustRequest.
func (*AdjustRequest) DeepCopyInto ¶ added in v1.35.0
func (in *AdjustRequest) DeepCopyInto(out *AdjustRequest)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type AllocRequest ¶ added in v1.35.0
type AllocRequest struct { // Name of the GPU pool to allocate from PoolName string // Namespace information for the workload WorkloadNameNamespace NameNamespace // Resource requirements for the allocation Request Resource Limit Resource // Number of GPUs to allocate Count uint // Specific GPU model to allocate, empty string means any model GPUModel string // Node affinity requirements NodeAffinity *v1.NodeAffinity // final scheduled GPU IDs for this allocation request // This fields is set by GPUAllocator, user should not choose specific GPUs GPUNames []string // record the pod meta for quota check PodMeta metav1.ObjectMeta }
func (*AllocRequest) Clone ¶ added in v1.35.0
func (ar *AllocRequest) Clone() framework.StateData
func (*AllocRequest) DeepCopy ¶ added in v1.35.0
func (in *AllocRequest) DeepCopy() *AllocRequest
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AllocRequest.
func (*AllocRequest) DeepCopyInto ¶ added in v1.35.0
func (in *AllocRequest) DeepCopyInto(out *AllocRequest)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type AuthTypeEnum ¶
type AuthTypeEnum string
+kubebuilder:validation:Enum=accessKey;serviceAccountRole
const ( AuthTypeAccessKey AuthTypeEnum = "accessKey" AuthTypeServiceAccountRole AuthTypeEnum = "serviceAccountRole" )
type AutoFreeze ¶
type AutoFreeze struct { Qos QoSLevel `json:"qos,omitempty"` FreezeToMemTTL string `json:"freezeToMemTTL,omitempty"` FreezeToDiskTTL string `json:"freezeToDiskTTL,omitempty"` Enable *bool `json:"enable,omitempty"` }
func (*AutoFreeze) DeepCopy ¶
func (in *AutoFreeze) DeepCopy() *AutoFreeze
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AutoFreeze.
func (*AutoFreeze) DeepCopyInto ¶
func (in *AutoFreeze) DeepCopyInto(out *AutoFreeze)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type AutoFreezeAndResume ¶ added in v1.35.0
type AutoFreezeAndResume struct { AutoFreeze []AutoFreeze `json:"autoFreeze,omitempty"` IntelligenceWarmup SmartSchedulerModelInput `json:"intelligenceWarmup,omitempty"` }
func (*AutoFreezeAndResume) DeepCopy ¶ added in v1.35.0
func (in *AutoFreezeAndResume) DeepCopy() *AutoFreezeAndResume
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AutoFreezeAndResume.
func (*AutoFreezeAndResume) DeepCopyInto ¶ added in v1.35.0
func (in *AutoFreezeAndResume) DeepCopyInto(out *AutoFreezeAndResume)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type AutoScalingConfig ¶
type AutoScalingConfig struct { // layer 1 vertical auto-scaling, turbo burst to existing GPU cards quickly // VPA-like, aggregate metrics data <1m AutoSetLimits AutoSetLimits `json:"autoSetLimits,omitempty"` // layer 2 horizontal auto-scaling, scale up to more GPU cards if max limits threshold hit // HPA-like, aggregate metrics data 1m-1h (when tf-worker scaled-up, should also trigger client pod's owner[Deployment etc.]'s replica increasing, check if KNative works) AutoSetReplicas AutoSetReplicas `json:"autoSetReplicas,omitempty"` // layer 3 adjusting, to match the actual usage in the long run, only for N:M remote vGPU mode, not impl yet // Adjust baseline requests to match the actual usage in longer period, such as 1day - 2weeks AutoSetRequests AutoSetRequests `json:"autoSetRequests,omitempty"` }
func (*AutoScalingConfig) DeepCopy ¶
func (in *AutoScalingConfig) DeepCopy() *AutoScalingConfig
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AutoScalingConfig.
func (*AutoScalingConfig) DeepCopyInto ¶
func (in *AutoScalingConfig) DeepCopyInto(out *AutoScalingConfig)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type AutoSetLimits ¶
type AutoSetLimits struct { Enable bool `json:"enable,omitempty"` // target resource to scale limits, such as "tflops", "vram", or "all" by default TargetResource string `json:"targetResource,omitempty"` EvaluationPeriod string `json:"evaluationPeriod,omitempty"` ExtraTFlopsBufferRatio string `json:"extraTFlopsBufferRatio,omitempty"` IgnoredDeltaRange string `json:"ignoredDeltaRange,omitempty"` ScaleUpStep string `json:"scaleUpStep,omitempty"` // the multiplier of requests, to avoid limit set too high, like 5.0 MaxRatioToRequests string `json:"maxRatioToRequests,omitempty"` Prediction *SmartSchedulerModelInput `json:"prediction,omitempty"` }
A typical autoLimits algorithm could be checking every 5m, look back 1 day data, select 99% of actual usage as preferredLimits, calculate finalPreferredLimits, which is preferredLimits*(1+extraBufferRatio) if they are equal with each other within a range (eg. 5%), do nothing if finalPreferredLimits is less than current limits and exceeded error range, set current limits to finalPreferredLimits if finalPreferredLimits > current limits and exceeded error range, set current limits to max(finalPreferredLimits, current limits * scaleUpStep) if AI prediction enabled, it helps to detect history pattern, and set more reasonable, explainable limit value the final set limits should be max(finalPreferredLimits, last(predict_value * (1 + extraTFlopsBufferRatio)))
func (*AutoSetLimits) DeepCopy ¶
func (in *AutoSetLimits) DeepCopy() *AutoSetLimits
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AutoSetLimits.
func (*AutoSetLimits) DeepCopyInto ¶
func (in *AutoSetLimits) DeepCopyInto(out *AutoSetLimits)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type AutoSetReplicas ¶
type AutoSetReplicas struct { Enable bool `json:"enable,omitempty"` TargetTFlopsOfLimits string `json:"targetTFlopsOfLimits,omitempty"` EvaluationPeriod string `json:"evaluationPeriod,omitempty"` ScaleUpStep string `json:"scaleUpStep,omitempty"` ScaleDownStep string `json:"scaleDownStep,omitempty"` ScaleUpCoolDownTime string `json:"scaleUpCoolDownTime,omitempty"` ScaleDownCoolDownTime string `json:"scaleDownCoolDownTime,omitempty"` }
To handle burst traffic, scale up in short time (this feature requires GPU context migration & replication, not available yet)
func (*AutoSetReplicas) DeepCopy ¶
func (in *AutoSetReplicas) DeepCopy() *AutoSetReplicas
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AutoSetReplicas.
func (*AutoSetReplicas) DeepCopyInto ¶
func (in *AutoSetReplicas) DeepCopyInto(out *AutoSetReplicas)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type AutoSetRequests ¶
type AutoSetRequests struct { Enable bool `json:"enable,omitempty"` // target resource to scale requests, such as "tflops", "vram", or "all" by default TargetResource string `json:"targetResource,omitempty"` PercentileForAutoRequests string `json:"percentileForAutoRequests,omitempty"` // the request buffer ratio, for example actual usage is 1.0, 10% buffer will be 1.1 as final preferred requests ExtraBufferRatio string `json:"extraBufferRatio,omitempty"` EvaluationPeriod string `json:"evaluationPeriod,omitempty"` AggregationPeriod string `json:"aggregationPeriod,omitempty"` Prediction SmartSchedulerModelInput `json:"prediction,omitempty"` }
func (*AutoSetRequests) DeepCopy ¶
func (in *AutoSetRequests) DeepCopy() *AutoSetRequests
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AutoSetRequests.
func (*AutoSetRequests) DeepCopyInto ¶
func (in *AutoSetRequests) DeepCopyInto(out *AutoSetRequests)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type BudgetExceedStrategy ¶
type BudgetExceedStrategy string
+kubebuilder:validation:Enum=AlertOnly;AlertAndTerminateVM
const ( BudgetExceedStrategyAlertOnly BudgetExceedStrategy = "AlertOnly" BudgetExceedStrategyAlertAndTerminateVM BudgetExceedStrategy = "AlertAndTerminateVM" )
type CapacityConfig ¶
type CapacityConfig struct { // +optional MinResources *GPUOrCPUResourceUnit `json:"minResources,omitempty"` // +optional MaxResources *GPUOrCPUResourceUnit `json:"maxResources,omitempty"` // +optional WarmResources *GPUOrCPUResourceUnit `json:"warmResources,omitempty"` // +optional Oversubscription *Oversubscription `json:"oversubscription,omitempty"` }
func (*CapacityConfig) DeepCopy ¶
func (in *CapacityConfig) DeepCopy() *CapacityConfig
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CapacityConfig.
func (*CapacityConfig) DeepCopyInto ¶
func (in *CapacityConfig) DeepCopyInto(out *CapacityConfig)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ClientConfig ¶
type ClientConfig struct { RemoteModeImage string `json:"remoteModeImage,omitempty"` EmbeddedModeImage string `json:"embeddedModeImage,omitempty"` OperatorEndpoint string `json:"operatorEndpoint,omitempty"` // +optional PatchToPod *runtime.RawExtension `json:"patchToPod,omitempty"` // +optional PatchToContainer *runtime.RawExtension `json:"patchToContainer,omitempty"` // +optional PatchToEmbeddedWorkerContainer *runtime.RawExtension `json:"patchToEmbeddedWorkerContainer,omitempty"` // +optional PatchEmbeddedWorkerToPod *runtime.RawExtension `json:"patchEmbeddedWorkerToPod,omitempty"` }
func (*ClientConfig) DeepCopy ¶
func (in *ClientConfig) DeepCopy() *ClientConfig
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClientConfig.
func (*ClientConfig) DeepCopyInto ¶
func (in *ClientConfig) DeepCopyInto(out *ClientConfig)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ComponentConfig ¶
type ComponentConfig struct { // +optional Worker *WorkerConfig `json:"worker,omitempty"` // +optional Hypervisor *HypervisorConfig `json:"hypervisor,omitempty"` // +optional NodeDiscovery *NodeDiscoveryConfig `json:"nodeDiscovery,omitempty"` // +optional Client *ClientConfig `json:"client,omitempty"` }
Customize system components for seamless onboarding.
func (*ComponentConfig) DeepCopy ¶
func (in *ComponentConfig) DeepCopy() *ComponentConfig
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ComponentConfig.
func (*ComponentConfig) DeepCopyInto ¶
func (in *ComponentConfig) DeepCopyInto(out *ComponentConfig)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ComputingVendorConfig ¶
type ComputingVendorConfig struct { Name string `json:"name,omitempty"` // support popular cloud providers Type ComputingVendorName `json:"type,omitempty"` AuthType AuthTypeEnum `json:"authType,omitempty"` // Authentication type (e.g., accessKey, serviceAccount). // +optional // +kubebuilder:default=true Enable *bool `json:"enable,omitempty"` // Enable or disable the computing vendor. Params ComputingVendorParams `json:"params,omitempty"` }
ComputingVendorConfig defines the Cloud vendor connection such as AWS, GCP, Azure etc.
func (*ComputingVendorConfig) DeepCopy ¶
func (in *ComputingVendorConfig) DeepCopy() *ComputingVendorConfig
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ComputingVendorConfig.
func (*ComputingVendorConfig) DeepCopyInto ¶
func (in *ComputingVendorConfig) DeepCopyInto(out *ComputingVendorConfig)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ComputingVendorName ¶
type ComputingVendorName string
+kubebuilder:validation:Enum=aws;lambda-labs;gcp;azure;oracle-oci;ibm;openshift;vultr;together-ai;alibaba;nvidia;tencent;runpod;karpenter;mock
const ( ComputingVendorAWS ComputingVendorName = "aws" ComputingVendorGCP ComputingVendorName = "gcp" ComputingVendorAzure ComputingVendorName = "azure" ComputingVendorOracle ComputingVendorName = "oracle-oci" ComputingVendorIBM ComputingVendorName = "ibm" ComputingVendorOpenShift ComputingVendorName = "openshift" ComputingVendorVultr ComputingVendorName = "vultr" ComputingVendorTogetherAI ComputingVendorName = "together-ai" ComputingVendorLambdaLabs ComputingVendorName = "lambda-labs" ComputingVendorAlibaba ComputingVendorName = "alibaba" ComputingVendorNvidia ComputingVendorName = "nvidia" ComputingVendorTencent ComputingVendorName = "tencent" ComputingVendorRunPod ComputingVendorName = "runpod" ComputingVendorKarpenter ComputingVendorName = "karpenter" // This is not unit/integration testing only, no cloud provider is involved ComputingVendorMock ComputingVendorName = "mock" )
type ComputingVendorParams ¶
type ComputingVendorParams struct { // +optional DefaultRegion string `json:"defaultRegion,omitempty"` // Region for the computing vendor. // the secret of access key and secret key or config file, must be mounted as file path // +optional AccessKeyPath string `json:"accessKeyPath,omitempty"` // +optional SecretKeyPath string `json:"secretKeyPath,omitempty"` // preferred IAM role since it's more secure // +optional IAMRole string `json:"iamRole,omitempty"` // +optional ConfigFile string `json:"configFile,omitempty"` // +optional // User can set extra cloud vendor params, eg. // in ali cloud:" spotPriceLimit, spotDuration, spotInterruptionBehavior, systemDiskCategory, systemDiskSize, dataDiskPerformanceLevel // in aws cloud: TODO ExtraParams map[string]string `json:"extraParams,omitempty"` }
func (*ComputingVendorParams) DeepCopy ¶
func (in *ComputingVendorParams) DeepCopy() *ComputingVendorParams
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ComputingVendorParams.
func (*ComputingVendorParams) DeepCopyInto ¶
func (in *ComputingVendorParams) DeepCopyInto(out *ComputingVendorParams)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPU ¶
type GPU struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Status GPUStatus `json:"status,omitempty"` }
+kubebuilder:object:root=true +kubebuilder:subresource:status +kubebuilder:resource:scope=Cluster +kubebuilder:printcolumn:name="GPU Model",type="string",JSONPath=".spec.gpuModel" +kubebuilder:printcolumn:name="Phase",type="string",JSONPath=".status.phase" +kubebuilder:printcolumn:name="Total TFlops",type="string",JSONPath=".status.capacity.tflops" +kubebuilder:printcolumn:name="Total VRAM",type="string",JSONPath=".status.capacity.vram" +kubebuilder:printcolumn:name="Available TFlops",type="string",JSONPath=".status.available.tflops" +kubebuilder:printcolumn:name="Available VRAM",type="string",JSONPath=".status.available.vram" +kubebuilder:printcolumn:name="Device UUID",type="string",JSONPath=".status.uuid" +kubebuilder:printcolumn:name="Used By",type="string",JSONPath=".status.usedBy" +kubebuilder:printcolumn:name="Node",type="string",JSONPath=".status.nodeSelector" GPU is the Schema for the gpus API.
func (*GPU) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPU.
func (*GPU) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*GPU) DeepCopyObject ¶
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type GPUAllocationInfo ¶ added in v1.39.1
type GPUAllocationInfo struct { Request Resource `json:"request,omitempty"` Limit Resource `json:"limit,omitempty"` PodName string `json:"podName,omitempty"` PodUID string `json:"podUID,omitempty"` Namespace string `json:"namespace,omitempty"` }
func (*GPUAllocationInfo) DeepCopy ¶ added in v1.39.1
func (in *GPUAllocationInfo) DeepCopy() *GPUAllocationInfo
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUAllocationInfo.
func (*GPUAllocationInfo) DeepCopyInto ¶ added in v1.39.1
func (in *GPUAllocationInfo) DeepCopyInto(out *GPUAllocationInfo)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPUFilter ¶
type GPUFilter struct { Type string `json:"type,omitempty"` Params runtime.RawExtension `json:"params,omitempty"` }
GPUFilter is to select eligible GPUs for scheduling.
example: ```yaml - type: avoidTooMuchConnectionsOnSameGPU params:
connectionNum: 150
- type: avoidDifferentZone params:
# by default, GPU worker will be scheduled into the same zone as CPU Client Pod to align AZ and improve performance topologyKey: topology.kubernetes.io/zone
```
func (*GPUFilter) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUFilter.
func (*GPUFilter) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPUList ¶
type GPUList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` Items []GPU `json:"items"` }
GPUList contains a list of GPU.
func (*GPUList) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUList.
func (*GPUList) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*GPUList) DeepCopyObject ¶
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type GPUNode ¶
type GPUNode struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec GPUNodeSpec `json:"spec,omitempty"` Status GPUNodeStatus `json:"status,omitempty"` }
+kubebuilder:object:root=true +kubebuilder:subresource:status +kubebuilder:resource:scope=Cluster +kubebuilder:printcolumn:name="Phase",type="string",JSONPath=".status.phase" +kubebuilder:printcolumn:name="Total TFlops",type="string",JSONPath=".status.totalTFlops" +kubebuilder:printcolumn:name="Total VRAM",type="string",JSONPath=".status.totalVRAM" +kubebuilder:printcolumn:name="Virtual TFlops",type="string",JSONPath=".status.virtualTFlops" +kubebuilder:printcolumn:name="Virtual VRAM",type="string",JSONPath=".status.virtualVRAM" +kubebuilder:printcolumn:name="Available TFlops",type="string",JSONPath=".status.availableTFlops" +kubebuilder:printcolumn:name="Available VRAM",type="string",JSONPath=".status.availableVRAM" +kubebuilder:printcolumn:name="GPU Count",type="integer",JSONPath=".status.totalGPUs" GPUNode is the Schema for the gpunodes API.
func (*GPUNode) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUNode.
func (*GPUNode) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*GPUNode) DeepCopyObject ¶
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (*GPUNode) InitializeStatus ¶
func (*GPUNode) SetAnnotationToTriggerNodeSync ¶
func (node *GPUNode) SetAnnotationToTriggerNodeSync()
type GPUNodeClass ¶
type GPUNodeClass struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec GPUNodeClassSpec `json:"spec,omitempty"` Status GPUNodeClassStatus `json:"status,omitempty"` }
+kubebuilder:object:root=true +kubebuilder:subresource:status +kubebuilder:resource:scope=Cluster GPUNodeClass is the Schema for the gpunodeclasses API.
func (*GPUNodeClass) DeepCopy ¶
func (in *GPUNodeClass) DeepCopy() *GPUNodeClass
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUNodeClass.
func (*GPUNodeClass) DeepCopyInto ¶
func (in *GPUNodeClass) DeepCopyInto(out *GPUNodeClass)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*GPUNodeClass) DeepCopyObject ¶
func (in *GPUNodeClass) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type GPUNodeClassList ¶
type GPUNodeClassList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` Items []GPUNodeClass `json:"items"` }
GPUNodeClassList contains a list of GPUNodeClass.
func (*GPUNodeClassList) DeepCopy ¶
func (in *GPUNodeClassList) DeepCopy() *GPUNodeClassList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUNodeClassList.
func (*GPUNodeClassList) DeepCopyInto ¶
func (in *GPUNodeClassList) DeepCopyInto(out *GPUNodeClassList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*GPUNodeClassList) DeepCopyObject ¶
func (in *GPUNodeClassList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type GPUNodeClassSpec ¶
type GPUNodeClassSpec struct { // +optional // The launch template to use for VM instances, if set, all other fields could be skipped LaunchTemplate NodeClassItemSelectorTerms `json:"launchTemplate"` // +optional // Could be private or public, varies in different cloud vendor, define where to query the OSImageID // +kubebuilder:default="Private" OSImageType OSImageTypeEnum `json:"osImageType,omitempty"` // the OS image identifier string, default to use first one, if not found, fallback to others OSImageSelectorTerms []NodeClassItemSelectorTerms `json:"osImageSelectorTerms,omitempty"` // +optional // The instance profile to use, assign IAM role and permissions for EC2 instances InstanceProfile string `json:"instanceProfile,omitempty"` // +optional // for AWS only, IMDSv2 metadata service options MetadataOptions *NodeClassMetadataOptions `json:"metadataOptions,omitempty"` // +optional SecurityGroupSelectorTerms []NodeClassItemSelectorTerms `json:"securityGroupSelectorTerms,omitempty"` // +optional SubnetSelectorTerms []NodeClassItemSelectorTerms `json:"subnetSelectorTerms,omitempty"` // Terms to select subnets // +optional BlockDeviceMappings []NodeClassBlockDeviceMappings `json:"blockDeviceMappings,omitempty"` // Block device mappings for the instance // +optional Tags map[string]string `json:"tags,omitempty"` // Tags associated with the resource // +optional UserData string `json:"userData,omitempty"` // User data script for the instance }
GPUNodeClassSpec defines the desired state of GPUNodeClass.
func (*GPUNodeClassSpec) DeepCopy ¶
func (in *GPUNodeClassSpec) DeepCopy() *GPUNodeClassSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUNodeClassSpec.
func (*GPUNodeClassSpec) DeepCopyInto ¶
func (in *GPUNodeClassSpec) DeepCopyInto(out *GPUNodeClassSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPUNodeClassStatus ¶
type GPUNodeClassStatus struct { }
GPUNodeClassStatus defines the observed state of GPUNodeClass.
func (*GPUNodeClassStatus) DeepCopy ¶
func (in *GPUNodeClassStatus) DeepCopy() *GPUNodeClassStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUNodeClassStatus.
func (*GPUNodeClassStatus) DeepCopyInto ¶
func (in *GPUNodeClassStatus) DeepCopyInto(out *GPUNodeClassStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPUNodeInfo ¶
type GPUNodeInfo struct { // +optional // only set when node is managed by TensorFusion InstanceID string `json:"instanceID,omitempty"` Region string `json:"region,omitempty"` Hostname string `json:"hostname,omitempty"` IP string `json:"ip,omitempty"` KernelVersion string `json:"kernelVersion,omitempty"` OSImage string `json:"osImage,omitempty"` GPUDriverVersion string `json:"gpuDriverVersion,omitempty"` GPUModel string `json:"gpuModel,omitempty"` GPUCount int32 `json:"gpuCount,omitempty"` OperatingSystem string `json:"operatingSystem,omitempty"` Architecture string `json:"architecture,omitempty"` // Additional space for L1/L2 VRAM buffer RAMSize resource.Quantity `json:"ramSize,omitempty"` DataDiskSize resource.Quantity `json:"dataDiskSize,omitempty"` }
func (*GPUNodeInfo) DeepCopy ¶
func (in *GPUNodeInfo) DeepCopy() *GPUNodeInfo
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUNodeInfo.
func (*GPUNodeInfo) DeepCopyInto ¶
func (in *GPUNodeInfo) DeepCopyInto(out *GPUNodeInfo)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPUNodeList ¶
type GPUNodeList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` Items []GPUNode `json:"items"` }
GPUNodeList contains a list of GPUNode.
func (*GPUNodeList) DeepCopy ¶
func (in *GPUNodeList) DeepCopy() *GPUNodeList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUNodeList.
func (*GPUNodeList) DeepCopyInto ¶
func (in *GPUNodeList) DeepCopyInto(out *GPUNodeList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*GPUNodeList) DeepCopyObject ¶
func (in *GPUNodeList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type GPUNodeManageMode ¶
type GPUNodeManageMode string
+kubebuilder:validation:Enum=Manual;AutoSelect;Provisioned
const ( GPUNodeManageModeManual GPUNodeManageMode = "Manual" GPUNodeManageModeAutoSelect GPUNodeManageMode = "AutoSelect" GPUNodeManageModeProvisioned GPUNodeManageMode = "Provisioned" )
type GPUNodeSpec ¶
type GPUNodeSpec struct { // +kubebuilder:default=AutoSelect ManageMode GPUNodeManageMode `json:"manageMode,omitempty"` // +optional CostPerHour string `json:"costPerHour,omitempty"` // if not all GPU cards should be used, specify the GPU card indices, default to empty, // onboard all GPU cards to the pool // +optional GPUCardIndices []int `json:"gpuCardIndices,omitempty"` // +optional CloudVendorParam string `json:"cloudVendorParam,omitempty"` }
GPUNodeSpec defines the desired state of GPUNode.
func (*GPUNodeSpec) DeepCopy ¶
func (in *GPUNodeSpec) DeepCopy() *GPUNodeSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUNodeSpec.
func (*GPUNodeSpec) DeepCopyInto ¶
func (in *GPUNodeSpec) DeepCopyInto(out *GPUNodeSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPUNodeStatus ¶
type GPUNodeStatus struct { // the identifier of the kubernetes node, in nodeSelector mode, GPUNode name is the same as kubernetes node name because of it's owned by the Kubernetes node, while in node provisioning mode owned by the GPUNode, and K8S Node name is uncontrollable KubernetesNodeName string `json:"kubernetesNodeName"` // +kubebuilder:default=Pending Phase TensorFusionGPUNodePhase `json:"phase"` // +optional Conditions []metav1.Condition `json:"conditions,omitempty"` TotalTFlops resource.Quantity `json:"totalTFlops"` TotalVRAM resource.Quantity `json:"totalVRAM"` VirtualTFlops resource.Quantity `json:"virtualTFlops"` VirtualVRAM resource.Quantity `json:"virtualVRAM"` AvailableTFlops resource.Quantity `json:"availableTFlops"` AvailableVRAM resource.Quantity `json:"availableVRAM"` // +optional VirtualAvailableTFlops *resource.Quantity `json:"virtualAvailableTFlops,omitempty"` // +optional VirtualAvailableVRAM *resource.Quantity `json:"virtualAvailableVRAM,omitempty"` // +optional HypervisorStatus NodeHypervisorStatus `json:"hypervisorStatus,omitempty"` // +optional NodeInfo GPUNodeInfo `json:"nodeInfo,omitempty"` // +optional LoadedModels *[]string `json:"loadedModels,omitempty"` TotalGPUs int32 `json:"totalGPUs"` ManagedGPUs int32 `json:"managedGPUs"` // +optional ManagedGPUDeviceIDs []string `json:"managedGPUDeviceIDs,omitempty"` ObservedGeneration int64 `json:"observedGeneration,omitempty"` // +optional AllocationInfo []*RunningAppDetail `json:"allocationInfo,omitempty"` }
GPUNodeStatus defines the observed state of GPUNode.
func (*GPUNodeStatus) DeepCopy ¶
func (in *GPUNodeStatus) DeepCopy() *GPUNodeStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUNodeStatus.
func (*GPUNodeStatus) DeepCopyInto ¶
func (in *GPUNodeStatus) DeepCopyInto(out *GPUNodeStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPUOrCPUResourceUnit ¶
type GPUOrCPUResourceUnit struct { TFlops resource.Quantity `json:"tflops,omitempty"` VRAM resource.Quantity `json:"vram,omitempty"` // CPU/Memory is only available when CloudVendor connection is enabled // +optional CPU resource.Quantity `json:"cpu,omitempty"` // +optional Memory resource.Quantity `json:"memory,omitempty"` }
func (*GPUOrCPUResourceUnit) DeepCopy ¶
func (in *GPUOrCPUResourceUnit) DeepCopy() *GPUOrCPUResourceUnit
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUOrCPUResourceUnit.
func (*GPUOrCPUResourceUnit) DeepCopyInto ¶
func (in *GPUOrCPUResourceUnit) DeepCopyInto(out *GPUOrCPUResourceUnit)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPUPool ¶
type GPUPool struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec GPUPoolSpec `json:"spec,omitempty"` Status GPUPoolStatus `json:"status,omitempty"` }
+kubebuilder:printcolumn:name="Phase",type="string",JSONPath=".status.phase" +kubebuilder:printcolumn:name="TFlops Oversubscription",type="string",JSONPath=".spec.capacityConfig.oversubscription.tflopsOversellRatio" +kubebuilder:printcolumn:name="Mode",type="string",JSONPath=".status.mode" +kubebuilder:printcolumn:name="Default Scheduling Strategy",type="string",JSONPath=".spec.schedulingConfigTemplate" +kubebuilder:printcolumn:name="Total Nodes",type="string",JSONPath=".status.totalNodes" +kubebuilder:printcolumn:name="Total GPU",type="string",JSONPath=".status.totalGPUs" +kubebuilder:printcolumn:name="Total Tflops",type="string",JSONPath=".status.totalTFlops" +kubebuilder:printcolumn:name="Total VRAM",type="string",JSONPath=".status.totalVRAM" +kubebuilder:printcolumn:name="Available Tflops",type="string",JSONPath=".status.availableTFlops" +kubebuilder:printcolumn:name="Available VRAM",type="string",JSONPath=".status.availableVRAM"
func (*GPUPool) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUPool.
func (*GPUPool) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*GPUPool) DeepCopyObject ¶
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type GPUPoolDefinition ¶
type GPUPoolDefinition struct { Name string `json:"name,omitempty"` // Name of the GPU pool. IsDefault bool `json:"isDefault,omitempty"` SpecTemplate GPUPoolSpec `json:"specTemplate"` }
GPUPool defines how to create a GPU pool, could be URL or inline
func (*GPUPoolDefinition) DeepCopy ¶
func (in *GPUPoolDefinition) DeepCopy() *GPUPoolDefinition
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUPoolDefinition.
func (*GPUPoolDefinition) DeepCopyInto ¶
func (in *GPUPoolDefinition) DeepCopyInto(out *GPUPoolDefinition)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPUPoolList ¶
type GPUPoolList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` Items []GPUPool `json:"items"` }
GPUPoolList contains a list of GPUPool.
func (*GPUPoolList) DeepCopy ¶
func (in *GPUPoolList) DeepCopy() *GPUPoolList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUPoolList.
func (*GPUPoolList) DeepCopyInto ¶
func (in *GPUPoolList) DeepCopyInto(out *GPUPoolList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*GPUPoolList) DeepCopyObject ¶
func (in *GPUPoolList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type GPUPoolSpec ¶
type GPUPoolSpec struct { CapacityConfig *CapacityConfig `json:"capacityConfig,omitempty"` NodeManagerConfig *NodeManagerConfig `json:"nodeManagerConfig,omitempty"` // +optional QosConfig *QosConfig `json:"qosConfig,omitempty"` // +optional ComponentConfig *ComponentConfig `json:"componentConfig,omitempty"` // +optional SchedulingConfigTemplate *string `json:"schedulingConfigTemplate,omitempty"` }
GPUPoolSpec defines the desired state of GPUPool.
func (*GPUPoolSpec) DeepCopy ¶
func (in *GPUPoolSpec) DeepCopy() *GPUPoolSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUPoolSpec.
func (*GPUPoolSpec) DeepCopyInto ¶
func (in *GPUPoolSpec) DeepCopyInto(out *GPUPoolSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPUPoolStatus ¶
type GPUPoolStatus struct { Cluster string `json:"cluster,omitempty"` // +kubebuilder:default=Pending Phase TensorFusionPoolPhase `json:"phase"` Conditions []metav1.Condition `json:"conditions,omitempty"` TotalNodes int32 `json:"totalNodes,omitempty"` TotalGPUs int32 `json:"totalGPUs,omitempty"` ReadyNodes int32 `json:"readyNodes"` NotReadyNodes int32 `json:"notReadyNodes"` TotalTFlops resource.Quantity `json:"totalTFlops"` TotalVRAM resource.Quantity `json:"totalVRAM"` VirtualTFlops resource.Quantity `json:"virtualTFlops"` VirtualVRAM resource.Quantity `json:"virtualVRAM"` AvailableTFlops resource.Quantity `json:"availableTFlops"` AvailableVRAM resource.Quantity `json:"availableVRAM"` RunningAppsCnt int32 `json:"runningAppsCnt,omitempty"` // +optional VirtualAvailableTFlops *resource.Quantity `json:"virtualAvailableTFlops,omitempty"` // +optional VirtualAvailableVRAM *resource.Quantity `json:"virtualAvailableVRAM,omitempty"` // when updating any component version or config, pool controller will perform rolling update. // the status will be updated periodically, default to 5s, progress will be 0-100. // when the progress is 100, the component version or config is fully updated. ComponentStatus PoolComponentStatus `json:"componentStatus"` // TODO: calculated every 1h/1d/1w average UtilizedTFlopsPercent string `json:"utilizedTFlopsPercent,omitempty"` UtilizedVRAMPercent string `json:"utilizedVRAMPercent,omitempty"` // TODO: updated with interval AllocatedTFlopsPercent string `json:"allocatedTFlopsPercent,omitempty"` AllocatedVRAMPercent string `json:"allocatedVRAMPercent,omitempty"` // TODO: aggregated with interval SavedCostsPerMonth string `json:"savedCostsPerMonth,omitempty"` PotentialSavingsPerMonth string `json:"potentialSavingsPerMonth,omitempty"` // +kubebuilder:default="" // If the budget is exceeded, the set value in comma separated string to indicate which period caused the exceeding. // If this field is not empty, scheduler will not schedule new AI workloads and stop scaling-up check. // TODO not implemented yet BudgetExceeded string `json:"budgetExceeded,omitempty"` // +optional LastCompactionTime *metav1.Time `json:"lastCompactionTime,omitempty"` }
GPUPoolStatus defines the observed state of GPUPool.
func (*GPUPoolStatus) DeepCopy ¶
func (in *GPUPoolStatus) DeepCopy() *GPUPoolStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUPoolStatus.
func (*GPUPoolStatus) DeepCopyInto ¶
func (in *GPUPoolStatus) DeepCopyInto(out *GPUPoolStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPUResourceAvailablePercent ¶ added in v1.34.6
type GPUResourceAvailablePercent struct { // +optional RequestsTFlops string `json:"requests.tflops,omitempty"` // +optional RequestsVRAM string `json:"requests.vram,omitempty"` // +optional LimitsTFlops string `json:"limits.tflops,omitempty"` // +optional LimitsVRAM string `json:"limits.vram,omitempty"` // +optional Workers string `json:"workers,omitempty"` }
GPUResourceAvailablePercent defines available percentage for each resource Use string for round(2) float to avoid kubernetes resource can not store float issue
func (*GPUResourceAvailablePercent) DeepCopy ¶ added in v1.34.6
func (in *GPUResourceAvailablePercent) DeepCopy() *GPUResourceAvailablePercent
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUResourceAvailablePercent.
func (*GPUResourceAvailablePercent) DeepCopyInto ¶ added in v1.34.6
func (in *GPUResourceAvailablePercent) DeepCopyInto(out *GPUResourceAvailablePercent)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPUResourcePricingUnit ¶
type GPUResourcePricingUnit struct { // +kubebuilder:default="$0.0069228" PerFP16TFlopsPerHour string `json:"perFP16TFlopsPerHour,omitempty"` // +kubebuilder:default="$0.01548" PerGBOfVRAMPerHour string `json:"perGBOfVRAMPerHour,omitempty"` }
The default pricing based on second level pricing from https://modal.com/pricing with Tensor/CUDA Core : HBM = 2:1
func (*GPUResourcePricingUnit) DeepCopy ¶
func (in *GPUResourcePricingUnit) DeepCopy() *GPUResourcePricingUnit
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUResourcePricingUnit.
func (*GPUResourcePricingUnit) DeepCopyInto ¶
func (in *GPUResourcePricingUnit) DeepCopyInto(out *GPUResourcePricingUnit)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPUResourceQuota ¶ added in v1.34.6
type GPUResourceQuota struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec GPUResourceQuotaSpec `json:"spec,omitempty"` Status GPUResourceQuotaStatus `json:"status,omitempty"` }
GPUResourceQuota is the Schema for the gpuresourcequotas API
func (*GPUResourceQuota) DeepCopy ¶ added in v1.34.6
func (in *GPUResourceQuota) DeepCopy() *GPUResourceQuota
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUResourceQuota.
func (*GPUResourceQuota) DeepCopyInto ¶ added in v1.34.6
func (in *GPUResourceQuota) DeepCopyInto(out *GPUResourceQuota)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*GPUResourceQuota) DeepCopyObject ¶ added in v1.34.6
func (in *GPUResourceQuota) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type GPUResourceQuotaConditionType ¶ added in v1.34.6
type GPUResourceQuotaConditionType string
GPUResourceQuotaConditionType defines the condition types for GPUResourceQuota
const ( // GPUResourceQuotaConditionReady indicates the quota is ready and functioning GPUResourceQuotaConditionReady GPUResourceQuotaConditionType = "Ready" // GPUResourceQuotaConditionAlertThresholdReached indicates the alert threshold has been reached GPUResourceQuotaConditionAlertThresholdReached GPUResourceQuotaConditionType = "AlertThresholdReached" )
type GPUResourceQuotaList ¶ added in v1.34.6
type GPUResourceQuotaList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` Items []GPUResourceQuota `json:"items"` }
GPUResourceQuotaList contains a list of GPUResourceQuota
func (*GPUResourceQuotaList) DeepCopy ¶ added in v1.34.6
func (in *GPUResourceQuotaList) DeepCopy() *GPUResourceQuotaList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUResourceQuotaList.
func (*GPUResourceQuotaList) DeepCopyInto ¶ added in v1.34.6
func (in *GPUResourceQuotaList) DeepCopyInto(out *GPUResourceQuotaList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*GPUResourceQuotaList) DeepCopyObject ¶ added in v1.34.6
func (in *GPUResourceQuotaList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type GPUResourceQuotaSingle ¶ added in v1.34.6
type GPUResourceQuotaSingle struct { // Maximum resources per workload // +optional MaxRequests *Resource `json:"maxRequests,omitempty"` // +optional MaxLimits *Resource `json:"maxLimits,omitempty"` // +optional MaxGPUCount *int32 `json:"maxGPUCount,omitempty"` // Default limits applied to workloads without explicit limits // +optional DefaultRequests *Resource `json:"defaultRequests,omitempty"` // Default requests applied to workloads without explicit requests // +optional DefaultLimits *Resource `json:"defaultLimits,omitempty"` }
GPUResourceQuotaSingle defines per-workload limits
func (*GPUResourceQuotaSingle) DeepCopy ¶ added in v1.34.6
func (in *GPUResourceQuotaSingle) DeepCopy() *GPUResourceQuotaSingle
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUResourceQuotaSingle.
func (*GPUResourceQuotaSingle) DeepCopyInto ¶ added in v1.34.6
func (in *GPUResourceQuotaSingle) DeepCopyInto(out *GPUResourceQuotaSingle)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPUResourceQuotaSpec ¶ added in v1.34.6
type GPUResourceQuotaSpec struct { // Total namespace limits (similar to ResourceQuotas) Total GPUResourceQuotaTotal `json:"total,omitempty"` // Per-workload limits (similar to LimitRanges) Single GPUResourceQuotaSingle `json:"single,omitempty"` }
GPUResourceQuotaSpec defines the desired state of GPUResourceQuota
func (*GPUResourceQuotaSpec) DeepCopy ¶ added in v1.34.6
func (in *GPUResourceQuotaSpec) DeepCopy() *GPUResourceQuotaSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUResourceQuotaSpec.
func (*GPUResourceQuotaSpec) DeepCopyInto ¶ added in v1.34.6
func (in *GPUResourceQuotaSpec) DeepCopyInto(out *GPUResourceQuotaSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPUResourceQuotaStatus ¶ added in v1.34.6
type GPUResourceQuotaStatus struct { // Current resource usage in the namespace Used GPUResourceUsage `json:"used,omitempty"` // Available percentage for each resource type AvailablePercent GPUResourceAvailablePercent `json:"availablePercent,omitempty"` // Conditions represent the latest available observations of the quota's state // +optional Conditions []metav1.Condition `json:"conditions,omitempty"` // LastUpdateTime is the last time the status was updated // +optional LastUpdateTime *metav1.Time `json:"lastUpdateTime,omitempty"` }
GPUResourceQuotaStatus defines the observed state of GPUResourceQuota
func (*GPUResourceQuotaStatus) DeepCopy ¶ added in v1.34.6
func (in *GPUResourceQuotaStatus) DeepCopy() *GPUResourceQuotaStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUResourceQuotaStatus.
func (*GPUResourceQuotaStatus) DeepCopyInto ¶ added in v1.34.6
func (in *GPUResourceQuotaStatus) DeepCopyInto(out *GPUResourceQuotaStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPUResourceQuotaTotal ¶ added in v1.34.6
type GPUResourceQuotaTotal struct { // Total requests limits for the namespace // +optional Requests *Resource `json:"requests,omitempty"` // Total limits for the namespace // +optional Limits *Resource `json:"limits,omitempty"` // Maximum number of workers in the namespace // +optional // +kubebuilder:default=32768 MaxWorkers *int32 `json:"maxWorkers,omitempty"` // Alert threshold percentage (0-100) // When usage exceeds this percentage, an alert event will be triggered // +kubebuilder:validation:Minimum=0 // +kubebuilder:validation:Maximum=100 // +kubebuilder:default=95 // +optional AlertThresholdPercent *int32 `json:"alertThresholdPercent,omitempty"` }
GPUResourceQuotaTotal defines total namespace limits
func (*GPUResourceQuotaTotal) DeepCopy ¶ added in v1.34.6
func (in *GPUResourceQuotaTotal) DeepCopy() *GPUResourceQuotaTotal
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUResourceQuotaTotal.
func (*GPUResourceQuotaTotal) DeepCopyInto ¶ added in v1.34.6
func (in *GPUResourceQuotaTotal) DeepCopyInto(out *GPUResourceQuotaTotal)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPUResourceUnit ¶
type GPUResourceUnit struct { // Tera floating point operations per second TFlops resource.Quantity `json:"tflops,omitempty"` // VRAM is short for Video memory, namely GPU RAM VRAM resource.Quantity `json:"vram,omitempty"` }
func (*GPUResourceUnit) DeepCopy ¶
func (in *GPUResourceUnit) DeepCopy() *GPUResourceUnit
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUResourceUnit.
func (*GPUResourceUnit) DeepCopyInto ¶
func (in *GPUResourceUnit) DeepCopyInto(out *GPUResourceUnit)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPUResourceUsage ¶ added in v1.34.6
type GPUResourceUsage struct { // Current requests usage // +optional Requests Resource `json:"requests,omitempty"` // Current limits usage // +optional Limits Resource `json:"limits,omitempty"` // Current number of workers // +optional Workers int32 `json:"workers,omitempty"` }
GPUResourceUsage defines current resource usage
func (*GPUResourceUsage) DeepCopy ¶ added in v1.34.6
func (in *GPUResourceUsage) DeepCopy() *GPUResourceUsage
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUResourceUsage.
func (*GPUResourceUsage) DeepCopyInto ¶ added in v1.34.6
func (in *GPUResourceUsage) DeepCopyInto(out *GPUResourceUsage)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPUStatus ¶
type GPUStatus struct { // +kubebuilder:default=Pending Phase TensorFusionGPUPhase `json:"phase"` Capacity *Resource `json:"capacity"` Available *Resource `json:"available"` UUID string `json:"uuid"` // The host match selector to schedule worker pods NodeSelector map[string]string `json:"nodeSelector"` GPUModel string `json:"gpuModel"` // GPU is used by tensor-fusion or nvidia-operator // This is the key to be compatible with nvidia-device-plugin to avoid resource overlap // Hypervisor will watch kubelet device plugin to report all GPUs already used by nvidia-device-plugin // GPUs will be grouped by usedBy to be used by different Pods, // tensor-fusion annotation or nvidia-device-plugin resource block // +optional UsedBy UsedBySystem `json:"usedBy,omitempty"` Message string `json:"message"` // +optional RunningApps []*RunningAppDetail `json:"runningApps,omitempty"` }
GPUStatus defines the observed state of GPU.
func (*GPUStatus) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUStatus.
func (*GPUStatus) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type HypervisorConfig ¶
type HypervisorConfig struct { Image string `json:"image,omitempty"` VectorImage string `json:"vectorImage,omitempty"` // +kubebuilder:default=8000 // +kubebuilder:validation:Minimum=0 // +kubebuilder:validation:Maximum=65535 // +optional PortNumber *int32 `json:"portNumber,omitempty"` // +optional PodTemplate *runtime.RawExtension `json:"podTemplate,omitempty"` // +optional EnableVector bool `json:"enableVector,omitempty"` }
func (*HypervisorConfig) DeepCopy ¶
func (in *HypervisorConfig) DeepCopy() *HypervisorConfig
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HypervisorConfig.
func (*HypervisorConfig) DeepCopyInto ¶
func (in *HypervisorConfig) DeepCopyInto(out *HypervisorConfig)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type HypervisorScheduling ¶
type HypervisorScheduling struct { // additional layer to save VRAM, auto-freeze memory and cool down to RAM and Disk // Hypervisor will monitor and trigger freeze of inactive workers, Operator should mark them as scaled-to-zero and release the GPU pool resources, don't scale down CPU client part, so that they can continue to serve the traffic or scale down by other auto-scaling solutions like KEDA/KNative AutoFreezeAndResume AutoFreezeAndResume `json:"autoFreezeAndResume,omitempty"` // Hypervisor will move low priority jobs to pending queue if GPU is full // This config can adjust hypervisor's queueing behavior to balance the co-scheduling CUDA calls MultiProcessQueuing MultiProcessQueuing `json:"multiProcessQueuing,omitempty"` }
func (*HypervisorScheduling) DeepCopy ¶
func (in *HypervisorScheduling) DeepCopy() *HypervisorScheduling
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HypervisorScheduling.
func (*HypervisorScheduling) DeepCopyInto ¶
func (in *HypervisorScheduling) DeepCopyInto(out *HypervisorScheduling)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type MaintenanceWindow ¶
type MaintenanceWindow struct { // crontab syntax. Includes []string `json:"includes,omitempty"` }
func (*MaintenanceWindow) DeepCopy ¶
func (in *MaintenanceWindow) DeepCopy() *MaintenanceWindow
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MaintenanceWindow.
func (*MaintenanceWindow) DeepCopyInto ¶
func (in *MaintenanceWindow) DeepCopyInto(out *MaintenanceWindow)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type MultiProcessQueuing ¶
type MultiProcessQueuing struct { // +optional Enable *bool `json:"enable,omitempty"` Interval string `json:"interval,omitempty"` QueueLevelTimeSlices []string `json:"queueLevelTimeSlices,omitempty"` }
func (*MultiProcessQueuing) DeepCopy ¶
func (in *MultiProcessQueuing) DeepCopy() *MultiProcessQueuing
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MultiProcessQueuing.
func (*MultiProcessQueuing) DeepCopyInto ¶
func (in *MultiProcessQueuing) DeepCopyInto(out *MultiProcessQueuing)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type NameNamespace ¶
type NameNamespace struct { Name string `json:"name,omitempty"` Namespace string `json:"namespace,omitempty"` }
func (*NameNamespace) DeepCopy ¶
func (in *NameNamespace) DeepCopy() *NameNamespace
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NameNamespace.
func (*NameNamespace) DeepCopyInto ¶
func (in *NameNamespace) DeepCopyInto(out *NameNamespace)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (NameNamespace) String ¶ added in v1.35.0
func (n NameNamespace) String() string
type NodeClassBlockDeviceMappings ¶
type NodeClassBlockDeviceMappings struct { // +optional DeviceName string `json:"deviceName,omitempty"` // The device name for the block device EBS NodeClassBlockDeviceSettings `json:"ebs,omitempty"` }
func (*NodeClassBlockDeviceMappings) DeepCopy ¶
func (in *NodeClassBlockDeviceMappings) DeepCopy() *NodeClassBlockDeviceMappings
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeClassBlockDeviceMappings.
func (*NodeClassBlockDeviceMappings) DeepCopyInto ¶
func (in *NodeClassBlockDeviceMappings) DeepCopyInto(out *NodeClassBlockDeviceMappings)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type NodeClassBlockDeviceSettings ¶
type NodeClassBlockDeviceSettings struct { VolumeSize string `json:"volumeSize,omitempty"` // +optional // Default value would varies based on the cloud vendor // For AWS it's gp3, for Alicloud it's cloud_essd VolumeType string `json:"volumeType,omitempty"` // +optional // +kubebuilder:default=true DeleteOnTermination bool `json:"deleteOnTermination,omitempty"` // Whether to delete the EBS volume on termination // +optional // +kubebuilder:default=true Encrypted bool `json:"encrypted,omitempty"` // Whether the EBS volume is encrypted }
func (*NodeClassBlockDeviceSettings) DeepCopy ¶
func (in *NodeClassBlockDeviceSettings) DeepCopy() *NodeClassBlockDeviceSettings
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeClassBlockDeviceSettings.
func (*NodeClassBlockDeviceSettings) DeepCopyInto ¶
func (in *NodeClassBlockDeviceSettings) DeepCopyInto(out *NodeClassBlockDeviceSettings)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type NodeClassItemSelectorTerms ¶
type NodeClassItemSelectorTerms struct { // +optional // The item ID ID string `json:"id,omitempty"` // +optional // The item name Name string `json:"name,omitempty"` // +optional // Query by tags Tags map[string]string `json:"tags,omitempty"` }
func (*NodeClassItemSelectorTerms) DeepCopy ¶
func (in *NodeClassItemSelectorTerms) DeepCopy() *NodeClassItemSelectorTerms
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeClassItemSelectorTerms.
func (*NodeClassItemSelectorTerms) DeepCopyInto ¶
func (in *NodeClassItemSelectorTerms) DeepCopyInto(out *NodeClassItemSelectorTerms)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type NodeClassMetadataOptions ¶
type NodeClassMetadataOptions struct { // +optional // +kubebuilder:default=true HttpEndpoint bool `json:"httpEndpoint,omitempty"` // +optional // +kubebuilder:default=false HttpProtocolIPv6 bool `json:"httpProtocolIPv6,omitempty"` // +optional // +kubebuilder:default=1 HttpPutResponseHopLimit int `json:"httpPutResponseHopLimit,omitempty"` // +optional // +kubebuilder:default="required" HttpTokens string `json:"httpTokens,omitempty"` }
AWS IMDSv2 metadata service options
func (*NodeClassMetadataOptions) DeepCopy ¶
func (in *NodeClassMetadataOptions) DeepCopy() *NodeClassMetadataOptions
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeClassMetadataOptions.
func (*NodeClassMetadataOptions) DeepCopyInto ¶
func (in *NodeClassMetadataOptions) DeepCopyInto(out *NodeClassMetadataOptions)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type NodeCompaction ¶
type NodeCompaction struct { // +kubebuilder:default="5m" Period string `json:"period,omitempty"` }
func (*NodeCompaction) DeepCopy ¶
func (in *NodeCompaction) DeepCopy() *NodeCompaction
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeCompaction.
func (*NodeCompaction) DeepCopyInto ¶
func (in *NodeCompaction) DeepCopyInto(out *NodeCompaction)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type NodeDiscoveryConfig ¶
type NodeDiscoveryConfig struct { Image string `json:"image,omitempty"` // +optional PodTemplate *runtime.RawExtension `json:"podTemplate,omitempty"` }
func (*NodeDiscoveryConfig) DeepCopy ¶
func (in *NodeDiscoveryConfig) DeepCopy() *NodeDiscoveryConfig
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeDiscoveryConfig.
func (*NodeDiscoveryConfig) DeepCopyInto ¶
func (in *NodeDiscoveryConfig) DeepCopyInto(out *NodeDiscoveryConfig)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type NodeHypervisorStatus ¶
type NodeHypervisorStatus struct { HypervisorState string `json:"hypervisorState,omitempty"` HypervisorVersion string `json:"hypervisorVersion,omitempty"` LastHeartbeatTime metav1.Time `json:"lastHeartbeatTime,omitempty"` }
func (*NodeHypervisorStatus) DeepCopy ¶
func (in *NodeHypervisorStatus) DeepCopy() *NodeHypervisorStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeHypervisorStatus.
func (*NodeHypervisorStatus) DeepCopyInto ¶
func (in *NodeHypervisorStatus) DeepCopyInto(out *NodeHypervisorStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type NodeManagerConfig ¶
type NodeManagerConfig struct { // +kubebuilder:default="AutoSelect" ProvisioningMode ProvisioningMode `json:"provisioningMode,omitempty"` // +optional NodeProvisioner *NodeProvisioner `json:"nodeProvisioner,omitempty"` // +optional NodeSelector *corev1.NodeSelector `json:"nodeSelector,omitempty"` // +optional NodeCompaction *NodeCompaction `json:"nodeCompaction,omitempty"` // +optional NodePoolRollingUpdatePolicy *NodeRollingUpdatePolicy `json:"nodePoolRollingUpdatePolicy,omitempty"` }
func (*NodeManagerConfig) DeepCopy ¶
func (in *NodeManagerConfig) DeepCopy() *NodeManagerConfig
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeManagerConfig.
func (*NodeManagerConfig) DeepCopyInto ¶
func (in *NodeManagerConfig) DeepCopyInto(out *NodeManagerConfig)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type NodeProvisioner ¶
type NodeProvisioner struct { NodeClass string `json:"nodeClass,omitempty"` // +optional GPURequirements []Requirement `json:"gpuRequirements,omitempty"` // +optional GPUTaints []Taint `json:"gpuTaints,omitempty"` // +optional GPULabels map[string]string `json:"gpuNodeLabels,omitempty"` // +optional CPURequirements []Requirement `json:"cpuRequirements,omitempty"` // +optional CPUTaints []Taint `json:"cpuTaints,omitempty"` // +optional CPULabels map[string]string `json:"cpuNodeLabels,omitempty"` // +optional GPUAnnotation map[string]string `json:"gpuNodeAnnotations,omitempty"` // +optional // NodeProvisioner will start an virtual billing based on public pricing or customized pricing, if the VM's costs exceeded any budget constraints, the new VM will not be created, and alerts will be generated Budget *PeriodicalBudget `json:"budget,omitempty"` }
NodeProvisioner or NodeSelector, they are exclusive. NodeSelector is for existing GPUs, NodeProvisioner is for Karpenter-like auto management.
func (*NodeProvisioner) DeepCopy ¶
func (in *NodeProvisioner) DeepCopy() *NodeProvisioner
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeProvisioner.
func (*NodeProvisioner) DeepCopyInto ¶
func (in *NodeProvisioner) DeepCopyInto(out *NodeProvisioner)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type NodeRequirementKey ¶
type NodeRequirementKey string
+kubebuilder:validation:Enum=node.kubernetes.io/instance-type;kubernetes.io/arch;kubernetes.io/os;topology.kubernetes.io/region;topology.kubernetes.io/zone;karpenter.sh/capacity-type;tensor-fusion.ai/gpu-arch;tensor-fusion.ai/gpu-instance-family;tensor-fusion.ai/gpu-instance-size
const ( NodeRequirementKeyInstanceType NodeRequirementKey = "node.kubernetes.io/instance-type" NodeRequirementKeyArchitecture NodeRequirementKey = "kubernetes.io/arch" NodeRequirementKeyGPUArchitecture NodeRequirementKey = "tensor-fusion.ai/gpu-arch" NodeRequirementKeyOS NodeRequirementKey = "kubernetes.io/os" NodeRequirementKeyRegion NodeRequirementKey = "topology.kubernetes.io/region" NodeRequirementKeyZone NodeRequirementKey = "topology.kubernetes.io/zone" // capacity-type is charging method, can be spot/preemptive or on-demand NodeRequirementKeyCapacityType NodeRequirementKey = "karpenter.sh/capacity-type" NodeRequirementKeyInstanceFamily NodeRequirementKey = "tensor-fusion.ai/gpu-instance-family" NodeRequirementKeyInstanceSize NodeRequirementKey = "tensor-fusion.ai/gpu-instance-size" )
type NodeRollingUpdatePolicy ¶
type NodeRollingUpdatePolicy struct { // +kubebuilder:default=true // +optional AutoUpdate *bool `json:"autoUpdate,omitempty"` // +kubebuilder:default=100 // +kubebuilder:validation:Minimum=0 // +kubebuilder:validation:Maximum=100 BatchPercentage int32 `json:"batchPercentage,omitempty"` // +kubebuilder:default="10m" BatchInterval string `json:"batchInterval,omitempty"` // +optional // +kubebuilder:default="10m" MaxDuration string `json:"maxDuration,omitempty"` // +optional MaintenanceWindow MaintenanceWindow `json:"maintenanceWindow,omitempty"` }
func (*NodeRollingUpdatePolicy) DeepCopy ¶
func (in *NodeRollingUpdatePolicy) DeepCopy() *NodeRollingUpdatePolicy
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeRollingUpdatePolicy.
func (*NodeRollingUpdatePolicy) DeepCopyInto ¶
func (in *NodeRollingUpdatePolicy) DeepCopyInto(out *NodeRollingUpdatePolicy)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type OSImageTypeEnum ¶
type OSImageTypeEnum string
+kubebuilder:validation:Enum=Private;Public;System
const ( OSImageTypePrivate OSImageTypeEnum = "Private" OSImageTypePublic OSImageTypeEnum = "Public" OSImageTypeSystem OSImageTypeEnum = "System" )
type Oversubscription ¶
type Oversubscription struct { // the percentage of Host RAM appending to GPU VRAM, default to 50% // +optional // +kubebuilder:default=50 // +kubebuilder:validation:Minimum=0 // +kubebuilder:validation:Maximum=100 VRAMExpandToHostMem int32 `json:"vramExpandToHostMem,omitempty"` // the percentage of Host Disk appending to GPU VRAM, default to 70% // +optional // +kubebuilder:default=70 // +kubebuilder:validation:Minimum=0 // +kubebuilder:validation:Maximum=100 VRAMExpandToHostDisk int32 `json:"vramExpandToHostDisk,omitempty"` // The multi of TFlops to oversell, default to 500%, indicates 5 times oversell // +optional // +kubebuilder:default=500 // +kubebuilder:validation:Minimum=100 // +kubebuilder:validation:Maximum=100000 TFlopsOversellRatio int32 `json:"tflopsOversellRatio,omitempty"` }
func (*Oversubscription) DeepCopy ¶
func (in *Oversubscription) DeepCopy() *Oversubscription
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Oversubscription.
func (*Oversubscription) DeepCopyInto ¶
func (in *Oversubscription) DeepCopyInto(out *Oversubscription)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type PeriodicalBudget ¶
type PeriodicalBudget struct { // +kubebuilder:default="100" BudgetPerDay string `json:"budgetPerDay,omitempty"` // +kubebuilder:default="1000" BudgetPerMonth string `json:"budgetPerMonth,omitempty"` // +kubebuilder:default="3000" BudgetPerQuarter string `json:"budgetPerQuarter,omitempty"` // +kubebuilder:default=AlertOnly BudgetExceedStrategy BudgetExceedStrategy `json:"budgetExceedStrategy,omitempty"` }
The budget constraints in dollars
func (*PeriodicalBudget) DeepCopy ¶
func (in *PeriodicalBudget) DeepCopy() *PeriodicalBudget
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PeriodicalBudget.
func (*PeriodicalBudget) DeepCopyInto ¶
func (in *PeriodicalBudget) DeepCopyInto(out *PeriodicalBudget)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type PlacementConfig ¶
type PlacementConfig struct { // +kubebuilder:default=CompactFirst Mode PlacementMode `json:"mode"` // +kubebuilder:default=true // +optional AllowUsingLocalGPU *bool `json:"allowUsingLocalGPU,omitempty"` // If false, workloads will not be scheduled directly to GPU nodes with 'localGPU: true'. // +optional GPUFilters []GPUFilter `json:"gpuFilters,omitempty"` }
func (*PlacementConfig) DeepCopy ¶
func (in *PlacementConfig) DeepCopy() *PlacementConfig
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PlacementConfig.
func (*PlacementConfig) DeepCopyInto ¶
func (in *PlacementConfig) DeepCopyInto(out *PlacementConfig)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type PlacementMode ¶
type PlacementMode string
+kubebuilder:validation:Enum=CompactFirst;LowLoadFirst
const ( // default to compactFirst for cost saving and energy saving PlacementModeCompactFirst PlacementMode = "CompactFirst" // in some cases, use lowLoadFirst for balance and fairness PlacementModeLowLoadFirst PlacementMode = "LowLoadFirst" )
type PoolComponentStatus ¶
type PoolComponentStatus struct { WorkerVersion string `json:"worker,omitempty"` WorkerConfigSynced bool `json:"workerConfigSynced,omitempty"` WorkerUpdateProgress int32 `json:"workerUpdateProgress,omitempty"` HypervisorVersion string `json:"hypervisor,omitempty"` HypervisorConfigSynced bool `json:"hypervisorConfigSynced,omitempty"` HyperVisorUpdateProgress int32 `json:"hypervisorUpdateProgress,omitempty"` ClientVersion string `json:"client,omitempty"` ClientConfigSynced bool `json:"clientConfigSynced,omitempty"` ClientUpdateProgress int32 `json:"clientUpdateProgress,omitempty"` }
func (*PoolComponentStatus) DeepCopy ¶
func (in *PoolComponentStatus) DeepCopy() *PoolComponentStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PoolComponentStatus.
func (*PoolComponentStatus) DeepCopyInto ¶
func (in *PoolComponentStatus) DeepCopyInto(out *PoolComponentStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type PoolProvisioningStatus ¶
type PoolProvisioningStatus struct { InitializingNodes int32 `json:"initializingNodes,omitempty"` TerminatingNodes int32 `json:"terminatingNodes,omitempty"` AvailableNodes int32 `json:"availableNodes,omitempty"` }
func (*PoolProvisioningStatus) DeepCopy ¶
func (in *PoolProvisioningStatus) DeepCopy() *PoolProvisioningStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PoolProvisioningStatus.
func (*PoolProvisioningStatus) DeepCopyInto ¶
func (in *PoolProvisioningStatus) DeepCopyInto(out *PoolProvisioningStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ProvisioningMode ¶
type ProvisioningMode string
+kubebuilder:validation:Enum=Provisioned;AutoSelect;Karpenter
const ( ProvisioningModeProvisioned ProvisioningMode = "Provisioned" ProvisioningModeAutoSelect ProvisioningMode = "AutoSelect" ProvisioningModeKarpenter ProvisioningMode = "Karpenter" )
type QosConfig ¶
type QosConfig struct { Definitions []QosDefinition `json:"definitions,omitempty"` DefaultQoS QoSLevel `json:"defaultQoS,omitempty"` Pricing []QosPricing `json:"pricing,omitempty"` }
Define different QoS and their price.
func (*QosConfig) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new QosConfig.
func (*QosConfig) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type QosDefinition ¶
type QosDefinition struct { Name QoSLevel `json:"name,omitempty"` Description string `json:"description,omitempty"` Priority int `json:"priority,omitempty"` // Range from 1-100, reflects the scheduling priority when GPU is full and tasks are in the queue. }
func (*QosDefinition) DeepCopy ¶
func (in *QosDefinition) DeepCopy() *QosDefinition
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new QosDefinition.
func (*QosDefinition) DeepCopyInto ¶
func (in *QosDefinition) DeepCopyInto(out *QosDefinition)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type QosPricing ¶
type QosPricing struct { Qos QoSLevel `json:"qos,omitempty"` Requests GPUResourcePricingUnit `json:"requests,omitempty"` // Default requests and limitsOverRequests are same, indicates normal on-demand serverless GPU usage, in hands-on lab low QoS case, limitsOverRequests should be lower, so that user can get burstable GPU resources with very low cost // +kubebuilder:default="1" LimitsOverRequestsChargingRatio string `json:"limitsOverRequests,omitempty"` }
func (*QosPricing) DeepCopy ¶
func (in *QosPricing) DeepCopy() *QosPricing
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new QosPricing.
func (*QosPricing) DeepCopyInto ¶
func (in *QosPricing) DeepCopyInto(out *QosPricing)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ReBalanceThreshold ¶
type ReBalanceThreshold struct {
MatchAny runtime.RawExtension `json:"matchAny,omitempty"`
}
func (*ReBalanceThreshold) DeepCopy ¶
func (in *ReBalanceThreshold) DeepCopy() *ReBalanceThreshold
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ReBalanceThreshold.
func (*ReBalanceThreshold) DeepCopyInto ¶
func (in *ReBalanceThreshold) DeepCopyInto(out *ReBalanceThreshold)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ReBalancerConfig ¶
type ReBalancerConfig struct { Enable *bool `json:"enable,omitempty"` Interval string `json:"interval,omitempty"` ReBalanceCoolDownTime string `json:"reBalanceCoolDownTime,omitempty"` Threshold ReBalanceThreshold `json:"threshold,omitempty"` }
Avoid hot GPU devices and continuously balance the workload\nimplemented by trigger a simulation scheduling and advise better GPU nodes for scheduler
func (*ReBalancerConfig) DeepCopy ¶
func (in *ReBalancerConfig) DeepCopy() *ReBalancerConfig
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ReBalancerConfig.
func (*ReBalancerConfig) DeepCopyInto ¶
func (in *ReBalancerConfig) DeepCopyInto(out *ReBalancerConfig)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type Requirement ¶
type Requirement struct { Key NodeRequirementKey `json:"key,omitempty"` // +kubebuilder:default="In" // +kubebuilder:validation:Enum=In;Exists;DoesNotExist;Gt;Lt Operator corev1.NodeSelectorOperator `json:"operator,omitempty"` Values []string `json:"values,omitempty"` }
func (*Requirement) DeepCopy ¶
func (in *Requirement) DeepCopy() *Requirement
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Requirement.
func (*Requirement) DeepCopyInto ¶
func (in *Requirement) DeepCopyInto(out *Requirement)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type Resource ¶
type Resource struct { Tflops resource.Quantity `json:"tflops"` Vram resource.Quantity `json:"vram"` }
func (*Resource) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Resource.
func (*Resource) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type Resources ¶
func (*Resources) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Resources.
func (*Resources) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type RunningAppDetail ¶ added in v1.33.4
type RunningAppDetail struct { // Workload name namespace Name string `json:"name,omitempty"` Namespace string `json:"namespace,omitempty"` // Worker count Count int `json:"count"` }
func (*RunningAppDetail) DeepCopy ¶ added in v1.33.4
func (in *RunningAppDetail) DeepCopy() *RunningAppDetail
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RunningAppDetail.
func (*RunningAppDetail) DeepCopyInto ¶ added in v1.33.4
func (in *RunningAppDetail) DeepCopyInto(out *RunningAppDetail)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type SchedulingConfigTemplate ¶
type SchedulingConfigTemplate struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec SchedulingConfigTemplateSpec `json:"spec,omitempty"` Status SchedulingConfigTemplateStatus `json:"status,omitempty"` }
+kubebuilder:object:root=true +kubebuilder:subresource:status +kubebuilder:resource:scope=Cluster +kubebuilder:printcolumn:name="Mode",type="string",JSONPath=".spec.placement.mode" +kubebuilder:printcolumn:name="Allow Local GPU",type="string",JSONPath=".spec.placement.allowLocalGPU" +kubebuilder:printcolumn:name="AutoFreeze",type="string",JSONPath=".spec.hypervisor.autoFreezeAndResume.autoFreeze.enable" SchedulingConfigTemplate is the Schema for the schedulingconfigtemplates API.
func (*SchedulingConfigTemplate) DeepCopy ¶
func (in *SchedulingConfigTemplate) DeepCopy() *SchedulingConfigTemplate
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingConfigTemplate.
func (*SchedulingConfigTemplate) DeepCopyInto ¶
func (in *SchedulingConfigTemplate) DeepCopyInto(out *SchedulingConfigTemplate)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*SchedulingConfigTemplate) DeepCopyObject ¶
func (in *SchedulingConfigTemplate) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type SchedulingConfigTemplateList ¶
type SchedulingConfigTemplateList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` Items []SchedulingConfigTemplate `json:"items"` }
SchedulingConfigTemplateList contains a list of SchedulingConfigTemplate.
func (*SchedulingConfigTemplateList) DeepCopy ¶
func (in *SchedulingConfigTemplateList) DeepCopy() *SchedulingConfigTemplateList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingConfigTemplateList.
func (*SchedulingConfigTemplateList) DeepCopyInto ¶
func (in *SchedulingConfigTemplateList) DeepCopyInto(out *SchedulingConfigTemplateList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*SchedulingConfigTemplateList) DeepCopyObject ¶
func (in *SchedulingConfigTemplateList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type SchedulingConfigTemplateSpec ¶
type SchedulingConfigTemplateSpec struct { // place the client or worker to best matched nodes Placement PlacementConfig `json:"placement"` // scale the workload based on the usage and traffic // +optional AutoScaling *AutoScalingConfig `json:"autoScaling,omitempty"` // avoid hot GPU devices and continuously balance the workload // implemented by trigger a simulation scheduling and advise better GPU nodes for scheduler // +optional ReBalancer *ReBalancerConfig `json:"reBalancer,omitempty"` // single GPU device multi-process queuing and fair scheduling with QoS constraint // +optional Hypervisor *HypervisorScheduling `json:"hypervisor,omitempty"` }
Place the workload to right nodes and scale smart.
func (*SchedulingConfigTemplateSpec) DeepCopy ¶
func (in *SchedulingConfigTemplateSpec) DeepCopy() *SchedulingConfigTemplateSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingConfigTemplateSpec.
func (*SchedulingConfigTemplateSpec) DeepCopyInto ¶
func (in *SchedulingConfigTemplateSpec) DeepCopyInto(out *SchedulingConfigTemplateSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type SchedulingConfigTemplateStatus ¶
type SchedulingConfigTemplateStatus struct { }
SchedulingConfigTemplateStatus defines the observed state of SchedulingConfigTemplate.
func (*SchedulingConfigTemplateStatus) DeepCopy ¶
func (in *SchedulingConfigTemplateStatus) DeepCopy() *SchedulingConfigTemplateStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingConfigTemplateStatus.
func (*SchedulingConfigTemplateStatus) DeepCopyInto ¶
func (in *SchedulingConfigTemplateStatus) DeepCopyInto(out *SchedulingConfigTemplateStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type SmartSchedulerModelInput ¶
type SmartSchedulerModelInput struct { Enable *bool `json:"enable,omitempty"` Model string `json:"model,omitempty"` HistoryDataPeriod string `json:"historyDataPeriod,omitempty"` PredictionPeriod string `json:"predictionPeriod,omitempty"` }
func (*SmartSchedulerModelInput) DeepCopy ¶
func (in *SmartSchedulerModelInput) DeepCopy() *SmartSchedulerModelInput
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SmartSchedulerModelInput.
func (*SmartSchedulerModelInput) DeepCopyInto ¶
func (in *SmartSchedulerModelInput) DeepCopyInto(out *SmartSchedulerModelInput)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type Taint ¶
type Taint struct { // +kubebuilder:default=NoSchedule // +kubebuilder:validation:Enum=NoSchedule;NoExecute;PreferNoSchedule Effect corev1.TaintEffect `json:"effect,omitempty"` Key string `json:"key,omitempty"` Value string `json:"value,omitempty"` }
func (*Taint) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Taint.
func (*Taint) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type TensorFusionCluster ¶
type TensorFusionCluster struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec TensorFusionClusterSpec `json:"spec,omitempty"` Status TensorFusionClusterStatus `json:"status,omitempty"` }
+kubebuilder:printcolumn:name="Total Tflops",type="string",JSONPath=".status.totalTFlops" +kubebuilder:printcolumn:name="Total VRAM",type="string",JSONPath=".status.totalVRAM" +kubebuilder:printcolumn:name="Available Tflops",type="string",JSONPath=".status.availableTFlops" +kubebuilder:printcolumn:name="Available VRAM",type="string",JSONPath=".status.availableVRAM" TensorFusionCluster is the Schema for the tensorfusionclusters API.
func (*TensorFusionCluster) DeepCopy ¶
func (in *TensorFusionCluster) DeepCopy() *TensorFusionCluster
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TensorFusionCluster.
func (*TensorFusionCluster) DeepCopyInto ¶
func (in *TensorFusionCluster) DeepCopyInto(out *TensorFusionCluster)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*TensorFusionCluster) DeepCopyObject ¶
func (in *TensorFusionCluster) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (*TensorFusionCluster) RefreshStatus ¶
func (tfc *TensorFusionCluster) RefreshStatus(ownedPools []GPUPool)
func (*TensorFusionCluster) SetAsPending ¶
func (tfc *TensorFusionCluster) SetAsPending()
func (*TensorFusionCluster) SetAsReady ¶
func (tfc *TensorFusionCluster) SetAsReady(conditions ...metav1.Condition) bool
func (*TensorFusionCluster) SetAsUnknown ¶
func (tfc *TensorFusionCluster) SetAsUnknown(err error) bool
func (*TensorFusionCluster) SetAsUpdating ¶
func (tfc *TensorFusionCluster) SetAsUpdating(conditions ...metav1.Condition) bool
type TensorFusionClusterList ¶
type TensorFusionClusterList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` Items []TensorFusionCluster `json:"items"` }
TensorFusionClusterList contains a list of TensorFusionCluster.
func (*TensorFusionClusterList) DeepCopy ¶
func (in *TensorFusionClusterList) DeepCopy() *TensorFusionClusterList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TensorFusionClusterList.
func (*TensorFusionClusterList) DeepCopyInto ¶
func (in *TensorFusionClusterList) DeepCopyInto(out *TensorFusionClusterList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*TensorFusionClusterList) DeepCopyObject ¶
func (in *TensorFusionClusterList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type TensorFusionClusterPhase ¶
type TensorFusionClusterPhase string
+kubebuilder:validation:Enum=Pending;Running;Updating;Destroying;Unknown TensorFusionClusterPhase represents the phase of the TensorFusionCluster resource.
type TensorFusionClusterSpec ¶
type TensorFusionClusterSpec struct { GPUPools []GPUPoolDefinition `json:"gpuPools,omitempty"` // +optional ComputingVendor *ComputingVendorConfig `json:"computingVendor,omitempty"` }
TensorFusionClusterSpec defines the desired state of TensorFusionCluster.
func (*TensorFusionClusterSpec) DeepCopy ¶
func (in *TensorFusionClusterSpec) DeepCopy() *TensorFusionClusterSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TensorFusionClusterSpec.
func (*TensorFusionClusterSpec) DeepCopyInto ¶
func (in *TensorFusionClusterSpec) DeepCopyInto(out *TensorFusionClusterSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type TensorFusionClusterStatus ¶
type TensorFusionClusterStatus struct { // +kubebuilder:default=Pending Phase TensorFusionClusterPhase `json:"phase,omitempty"` Conditions []metav1.Condition `json:"conditions,omitempty"` TotalPools int32 `json:"totalPools"` TotalNodes int32 `json:"totalNodes"` TotalGPUs int32 `json:"totalGPUs"` TotalTFlops resource.Quantity `json:"totalTFlops"` TotalVRAM resource.Quantity `json:"totalVRAM"` VirtualTFlops resource.Quantity `json:"virtualTFlops"` VirtualVRAM resource.Quantity `json:"virtualVRAM"` AvailableTFlops resource.Quantity `json:"availableTFlops"` AvailableVRAM resource.Quantity `json:"availableVRAM"` // +optional VirtualAvailableTFlops *resource.Quantity `json:"virtualAvailableTFlops,omitempty"` // +optional VirtualAvailableVRAM *resource.Quantity `json:"virtualAvailableVRAM,omitempty"` // +optional ReadyGPUPools []string `json:"readyGPUPools"` // +optional NotReadyGPUPools []string `json:"notReadyGPUPools"` // +kubebuilder:default=0 // RetryCount int64 `json:"retryCount"` // TODO: calculated every 1h/1d/1w average UtilizedTFlopsPercent string `json:"utilizedTFlopsPercent,omitempty"` UtilizedVRAMPercent string `json:"utilizedVRAMPercent,omitempty"` // TODO: updated with interval AllocatedTFlopsPercent string `json:"allocatedTFlopsPercent,omitempty"` AllocatedVRAMPercent string `json:"allocatedVRAMPercent,omitempty"` // TODO: aggregated with interval SavedCostsPerMonth string `json:"savedCostsPerMonth,omitempty"` PotentialSavingsPerMonth string `json:"potentialSavingsPerMonth,omitempty"` CloudVendorConfigHash string `json:"cloudVendorConfigHash,omitempty"` }
TensorFusionClusterStatus defines the observed state of TensorFusionCluster.
func (*TensorFusionClusterStatus) DeepCopy ¶
func (in *TensorFusionClusterStatus) DeepCopy() *TensorFusionClusterStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TensorFusionClusterStatus.
func (*TensorFusionClusterStatus) DeepCopyInto ¶
func (in *TensorFusionClusterStatus) DeepCopyInto(out *TensorFusionClusterStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type TensorFusionConnection ¶
type TensorFusionConnection struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec TensorFusionConnectionSpec `json:"spec,omitempty"` Status TensorFusionConnectionStatus `json:"status,omitempty"` }
TensorFusionConnection is the Schema for the tensorfusionconnections API.
func (*TensorFusionConnection) DeepCopy ¶
func (in *TensorFusionConnection) DeepCopy() *TensorFusionConnection
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TensorFusionConnection.
func (*TensorFusionConnection) DeepCopyInto ¶
func (in *TensorFusionConnection) DeepCopyInto(out *TensorFusionConnection)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*TensorFusionConnection) DeepCopyObject ¶
func (in *TensorFusionConnection) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type TensorFusionConnectionList ¶
type TensorFusionConnectionList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` Items []TensorFusionConnection `json:"items"` }
TensorFusionConnectionList contains a list of TensorFusionConnection.
func (*TensorFusionConnectionList) DeepCopy ¶
func (in *TensorFusionConnectionList) DeepCopy() *TensorFusionConnectionList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TensorFusionConnectionList.
func (*TensorFusionConnectionList) DeepCopyInto ¶
func (in *TensorFusionConnectionList) DeepCopyInto(out *TensorFusionConnectionList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*TensorFusionConnectionList) DeepCopyObject ¶
func (in *TensorFusionConnectionList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type TensorFusionConnectionSpec ¶
type TensorFusionConnectionSpec struct { WorkloadName string `json:"workloadName"` ClientPod string `json:"clientPod"` }
TensorFusionConnectionSpec defines the desired state of TensorFusionConnection.
func (*TensorFusionConnectionSpec) DeepCopy ¶
func (in *TensorFusionConnectionSpec) DeepCopy() *TensorFusionConnectionSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TensorFusionConnectionSpec.
func (*TensorFusionConnectionSpec) DeepCopyInto ¶
func (in *TensorFusionConnectionSpec) DeepCopyInto(out *TensorFusionConnectionSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type TensorFusionConnectionStatus ¶
type TensorFusionConnectionStatus struct { Phase WorkerPhase `json:"phase"` ConnectionURL string `json:"connectionURL"` WorkerName string `json:"workerName"` }
TensorFusionConnectionStatus defines the observed state of TensorFusionConnection.
func (*TensorFusionConnectionStatus) DeepCopy ¶
func (in *TensorFusionConnectionStatus) DeepCopy() *TensorFusionConnectionStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TensorFusionConnectionStatus.
func (*TensorFusionConnectionStatus) DeepCopyInto ¶
func (in *TensorFusionConnectionStatus) DeepCopyInto(out *TensorFusionConnectionStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type TensorFusionGPUNodePhase ¶
type TensorFusionGPUNodePhase string
+kubebuilder:validation:Enum=Pending;Provisioning;Migrating;Running;Succeeded;Failed;Unknown;Destroying
const ( TensorFusionGPUNodePhasePending TensorFusionGPUNodePhase = constants.PhasePending TensorFusionGPUNodePhaseMigrating TensorFusionGPUNodePhase = constants.PhaseMigrating TensorFusionGPUNodePhaseRunning TensorFusionGPUNodePhase = constants.PhaseRunning TensorFusionGPUNodePhaseSucceeded TensorFusionGPUNodePhase = constants.PhaseSucceeded TensorFusionGPUNodePhaseFailed TensorFusionGPUNodePhase = constants.PhaseFailed TensorFusionGPUNodePhaseUnknown TensorFusionGPUNodePhase = constants.PhaseUnknown TensorFusionGPUNodePhaseDestroying TensorFusionGPUNodePhase = constants.PhaseDestroying )
type TensorFusionGPUPhase ¶
type TensorFusionGPUPhase string
+kubebuilder:validation:Enum=Pending;Provisioning;Running;Unknown;Destroying;Migrating
const ( TensorFusionGPUPhasePending TensorFusionGPUPhase = constants.PhasePending TensorFusionGPUPhaseUpdating TensorFusionGPUPhase = constants.PhaseUpdating TensorFusionGPUPhaseRunning TensorFusionGPUPhase = constants.PhaseRunning TensorFusionGPUPhaseUnknown TensorFusionGPUPhase = constants.PhaseUnknown TensorFusionGPUPhaseDestroying TensorFusionGPUPhase = constants.PhaseDestroying TensorFusionGPUPhaseMigrating TensorFusionGPUPhase = constants.PhaseMigrating )
type TensorFusionPoolPhase ¶
type TensorFusionPoolPhase string
+kubebuilder:validation:Enum=Pending;Running;Updating;Destroying;Unknown
type TensorFusionWorkload ¶
type TensorFusionWorkload struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec WorkloadProfileSpec `json:"spec,omitempty"` Status TensorFusionWorkloadStatus `json:"status,omitempty"` }
TensorFusionWorkload is the Schema for the tensorfusionworkloads API.
func (*TensorFusionWorkload) DeepCopy ¶
func (in *TensorFusionWorkload) DeepCopy() *TensorFusionWorkload
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TensorFusionWorkload.
func (*TensorFusionWorkload) DeepCopyInto ¶
func (in *TensorFusionWorkload) DeepCopyInto(out *TensorFusionWorkload)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*TensorFusionWorkload) DeepCopyObject ¶
func (in *TensorFusionWorkload) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type TensorFusionWorkloadList ¶
type TensorFusionWorkloadList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` Items []TensorFusionWorkload `json:"items"` }
TensorFusionWorkloadList contains a list of TensorFusionWorkload.
func (*TensorFusionWorkloadList) DeepCopy ¶
func (in *TensorFusionWorkloadList) DeepCopy() *TensorFusionWorkloadList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TensorFusionWorkloadList.
func (*TensorFusionWorkloadList) DeepCopyInto ¶
func (in *TensorFusionWorkloadList) DeepCopyInto(out *TensorFusionWorkloadList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*TensorFusionWorkloadList) DeepCopyObject ¶
func (in *TensorFusionWorkloadList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type TensorFusionWorkloadPhase ¶ added in v1.32.0
type TensorFusionWorkloadPhase string
+kubebuilder:validation:Enum=Pending;Running;Failed;Unknown
const ( TensorFusionWorkloadPhasePending TensorFusionWorkloadPhase = "Pending" TensorFusionWorkloadPhaseRunning TensorFusionWorkloadPhase = "Running" TensorFusionWorkloadPhaseFailed TensorFusionWorkloadPhase = "Failed" )
type TensorFusionWorkloadStatus ¶
type TensorFusionWorkloadStatus struct { // +kubebuilder:default=Pending Phase TensorFusionWorkloadPhase `json:"phase,omitempty"` // Represents the latest available observations of the workload's current state. // +optional Conditions []metav1.Condition `json:"conditions,omitempty"` // workerCount is the number of vGPU workers WorkerCount int32 `json:"workerCount"` // readyWorkers is the number of vGPU workers ready ReadyWorkers int32 `json:"readyWorkers,omitempty"` // Hash of the pod template used to create worker pods PodTemplateHash string `json:"podTemplateHash,omitempty"` }
TensorFusionWorkloadStatus defines the observed state of TensorFusionWorkload.
func (*TensorFusionWorkloadStatus) DeepCopy ¶
func (in *TensorFusionWorkloadStatus) DeepCopy() *TensorFusionWorkloadStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TensorFusionWorkloadStatus.
func (*TensorFusionWorkloadStatus) DeepCopyInto ¶
func (in *TensorFusionWorkloadStatus) DeepCopyInto(out *TensorFusionWorkloadStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type UsedBySystem ¶ added in v1.39.0
type UsedBySystem string
+kubebuilder:validation:Enum=tensor-fusion;nvidia-device-plugin +default="tensor-fusion"
const ( UsedByTensorFusion UsedBySystem = "tensor-fusion" UsedByNvidiaDevicePlugin UsedBySystem = "nvidia-device-plugin" )
type WorkerConfig ¶
type WorkerConfig struct { Image string `json:"image,omitempty"` // +optional PodTemplate *runtime.RawExtension `json:"podTemplate,omitempty"` }
func (*WorkerConfig) DeepCopy ¶
func (in *WorkerConfig) DeepCopy() *WorkerConfig
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WorkerConfig.
func (*WorkerConfig) DeepCopyInto ¶
func (in *WorkerConfig) DeepCopyInto(out *WorkerConfig)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type WorkerPhase ¶
type WorkerPhase string
const ( WorkerPending WorkerPhase = "Pending" WorkerRunning WorkerPhase = "Running" WorkerFailed WorkerPhase = "Failed" )
type WorkerStatus ¶
type WorkerStatus struct { WorkerPhase WorkerPhase `json:"workerPhase"` WorkerName string `json:"workerName"` NodeSelector map[string]string `json:"nodeSelector,omitempty"` // +optional WorkerIp string `json:"workerIp,omitempty"` // +optional ResourceVersion string `json:"resourceVersion,omitempty"` }
func (*WorkerStatus) DeepCopy ¶
func (in *WorkerStatus) DeepCopy() *WorkerStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WorkerStatus.
func (*WorkerStatus) DeepCopyInto ¶
func (in *WorkerStatus) DeepCopyInto(out *WorkerStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type WorkloadProfile ¶ added in v1.23.7
type WorkloadProfile struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec WorkloadProfileSpec `json:"spec,omitempty"` Status WorkloadProfileStatus `json:"status,omitempty"` }
WorkloadProfile is the Schema for the workloadprofiles API.
func (*WorkloadProfile) DeepCopy ¶ added in v1.23.7
func (in *WorkloadProfile) DeepCopy() *WorkloadProfile
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WorkloadProfile.
func (*WorkloadProfile) DeepCopyInto ¶ added in v1.23.7
func (in *WorkloadProfile) DeepCopyInto(out *WorkloadProfile)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*WorkloadProfile) DeepCopyObject ¶ added in v1.23.7
func (in *WorkloadProfile) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type WorkloadProfileList ¶ added in v1.23.7
type WorkloadProfileList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` Items []WorkloadProfile `json:"items"` }
WorkloadProfileList contains a list of WorkloadProfile.
func (*WorkloadProfileList) DeepCopy ¶ added in v1.23.7
func (in *WorkloadProfileList) DeepCopy() *WorkloadProfileList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WorkloadProfileList.
func (*WorkloadProfileList) DeepCopyInto ¶ added in v1.23.7
func (in *WorkloadProfileList) DeepCopyInto(out *WorkloadProfileList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*WorkloadProfileList) DeepCopyObject ¶ added in v1.23.7
func (in *WorkloadProfileList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type WorkloadProfileSpec ¶ added in v1.23.7
type WorkloadProfileSpec struct { // +optional // If replicas not set, it will be dynamic based on pending Pod // If isLocalGPU set to true, replicas must be dynamic, and this field will be ignored Replicas *int32 `json:"replicas,omitempty"` // +optional PoolName string `json:"poolName,omitempty"` // +optional Resources Resources `json:"resources"` // +optional // Qos defines the quality of service level for the client. Qos QoSLevel `json:"qos,omitempty"` // +optional // Schedule the workload to the same GPU server that runs vGPU worker for best performance, default to false IsLocalGPU bool `json:"isLocalGPU,omitempty"` // +optional // GPUModel specifies the required GPU model (e.g., "A100", "H100") GPUModel string `json:"gpuModel,omitempty"` // The number of GPUs to be used by the workload, default to 1 GPUCount uint32 `json:"gpuCount,omitempty"` // +optional // AutoScalingConfig configured here will override Pool's schedulingConfig // This field can not be fully supported in annotation, if user want to enable auto-scaling in annotation, // user can set tensor-fusion.ai/auto-limits|requests|replicas: 'true' AutoScalingConfig AutoScalingConfig `json:"autoScalingConfig,omitempty"` // +optional // NodeAffinity specifies the node affinity requirements for the workload NodeAffinity *v1.NodeAffinity `json:"nodeAffinity,omitempty"` }
WorkloadProfileSpec defines the desired state of WorkloadProfile.
func (*WorkloadProfileSpec) DeepCopy ¶ added in v1.23.7
func (in *WorkloadProfileSpec) DeepCopy() *WorkloadProfileSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WorkloadProfileSpec.
func (*WorkloadProfileSpec) DeepCopyInto ¶ added in v1.23.7
func (in *WorkloadProfileSpec) DeepCopyInto(out *WorkloadProfileSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (WorkloadProfileSpec) IsDynamicReplica ¶ added in v1.35.0
func (t WorkloadProfileSpec) IsDynamicReplica() bool
type WorkloadProfileStatus ¶ added in v1.23.7
type WorkloadProfileStatus struct { }
WorkloadProfileStatus defines the observed state of WorkloadProfile.
func (*WorkloadProfileStatus) DeepCopy ¶ added in v1.23.7
func (in *WorkloadProfileStatus) DeepCopy() *WorkloadProfileStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WorkloadProfileStatus.
func (*WorkloadProfileStatus) DeepCopyInto ¶ added in v1.23.7
func (in *WorkloadProfileStatus) DeepCopyInto(out *WorkloadProfileStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
Source Files
¶
- base_types.go
- gpu_types.go
- gpunode_funcs.go
- gpunode_types.go
- gpunodeclass_types.go
- gpupool_types.go
- gpuresourcequota_types.go
- groupversion_info.go
- schedulingconfigtemplate_types.go
- tensorfusioncluster_funcs.go
- tensorfusioncluster_types.go
- tensorfusionconnection_types.go
- tensorfusionworkload_types.go
- workloadprofile_types.go
- zz_generated.deepcopy.go