Documentation
¶
Overview ¶
Package v1 contains API Schema definitions for the tensor-fusion.ai v1 API group. +kubebuilder:object:generate=true +groupName=tensor-fusion.ai
Index ¶
- Constants
- Variables
- type AlertConfig
- type AuthTypeEnum
- type AutoFreeze
- type AutoScalingConfig
- type AutoSetLimits
- type AutoSetReplicas
- type AutoSetRequests
- type BudgetExceedStrategy
- type CapacityConfig
- type ClientConfig
- type ComponentConfig
- type ComputingVendorConfig
- type ComputingVendorName
- type ComputingVendorParams
- type DataPipeline4ResourcesConfig
- type DataPipeline4TimeSeriesConfig
- type DataPipelineResultRemoteWriteConfig
- type DataPipelinesConfig
- type GPU
- type GPUFilter
- type GPUList
- type GPUNode
- type GPUNodeAllocationDetails
- type GPUNodeClass
- type GPUNodeClassList
- type GPUNodeClassSpec
- type GPUNodeClassStatus
- type GPUNodeInfo
- type GPUNodeList
- type GPUNodeManageMode
- type GPUNodeSpec
- type GPUNodeStatus
- type GPUOrCPUResourceUnit
- type GPUPool
- type GPUPoolDefinition
- type GPUPoolList
- type GPUPoolSpec
- type GPUPoolStatus
- type GPUResourcePricingUnit
- type GPUResourceUnit
- type GPUStatus
- type HypervisorConfig
- type HypervisorScheduling
- type MaintenanceWindow
- type MonitorConfig
- type MultiProcessQueuing
- type NameNamespace
- type NodeClassBlockDeviceMappings
- type NodeClassBlockDeviceSettings
- type NodeClassItemSelectorTerms
- type NodeClassMetadataOptions
- type NodeCompaction
- type NodeDiscoveryConfig
- type NodeHypervisorStatus
- type NodeManagerConfig
- type NodeProvisioner
- type NodeProvisionerMode
- type NodeRequirementKey
- type NodeRollingUpdatePolicy
- type OSImageTypeEnum
- type ObservabilityConfig
- type Oversubscription
- type PeriodicalBudget
- type PlacementConfig
- type PlacementMode
- type PoolComponentStatus
- type PoolProvisioningStatus
- type ProvisioningMode
- type QoSLevel
- type QosConfig
- type QosDefinition
- type QosPricing
- type ReBalanceThreshold
- type ReBalancerConfig
- type RemoteWriteConfig
- type Requirement
- type Resource
- type Resources
- type ScaleToZero
- type SchedulingConfigTemplate
- type SchedulingConfigTemplateList
- type SchedulingConfigTemplateSpec
- type SchedulingConfigTemplateStatus
- type SmartSchedulerModelInput
- type StorageVendorConfig
- type Taint
- type TensorFusionCluster
- func (in *TensorFusionCluster) DeepCopy() *TensorFusionCluster
- func (in *TensorFusionCluster) DeepCopyInto(out *TensorFusionCluster)
- func (in *TensorFusionCluster) DeepCopyObject() runtime.Object
- func (tfc *TensorFusionCluster) RefreshStatus(ownedPools []GPUPool)
- func (tfc *TensorFusionCluster) SetAsPending()
- func (tfc *TensorFusionCluster) SetAsReady(conditions ...metav1.Condition) bool
- func (tfc *TensorFusionCluster) SetAsUnknown(err error) bool
- func (tfc *TensorFusionCluster) SetAsUpdating(conditions ...metav1.Condition) bool
- type TensorFusionClusterList
- type TensorFusionClusterPhase
- type TensorFusionClusterSpec
- type TensorFusionClusterStatus
- type TensorFusionConnection
- type TensorFusionConnectionList
- type TensorFusionConnectionSpec
- type TensorFusionConnectionStatus
- type TensorFusionGPUNodePhase
- type TensorFusionGPUPhase
- type TensorFusionPoolPhase
- type TensorFusionWorkload
- type TensorFusionWorkloadList
- type TensorFusionWorkloadStatus
- type WorkerConfig
- type WorkerPhase
- type WorkerStatus
- type WorkloadProfile
- type WorkloadProfileList
- type WorkloadProfileSpec
- type WorkloadProfileStatus
Constants ¶
const ( TensorFusionPoolPhasePending = TensorFusionPoolPhase(constants.PhasePending) TensorFusionPoolPhaseRunning = TensorFusionPoolPhase(constants.PhaseRunning) TensorFusionPoolPhaseUpdating = TensorFusionPoolPhase(constants.PhaseUpdating) TensorFusionPoolPhaseUnknown = TensorFusionPoolPhase(constants.PhaseUnknown) TensorFusionPoolPhaseDestroying = TensorFusionPoolPhase(constants.PhaseDestroying) )
const ( TensorFusionClusterPending = TensorFusionClusterPhase(constants.PhasePending) TensorFusionClusterRunning = TensorFusionClusterPhase(constants.PhaseRunning) TensorFusionClusterUpdating = TensorFusionClusterPhase(constants.PhaseUpdating) TensorFusionClusterDestroying = TensorFusionClusterPhase(constants.PhaseDestroying) TensorFusionClusterUnknown = TensorFusionClusterPhase(constants.PhaseUnknown) )
Variables ¶
var ( // GroupVersion is group version used to register these objects. GroupVersion = schema.GroupVersion{Group: "tensor-fusion.ai", Version: "v1"} // SchemeBuilder is used to add go types to the GroupVersionKind scheme. SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} // AddToScheme adds the types in this group-version to the given scheme. AddToScheme = SchemeBuilder.AddToScheme )
Functions ¶
This section is empty.
Types ¶
type AlertConfig ¶
type AlertConfig struct { // +optional Expression *runtime.RawExtension `json:"expression,omitempty"` }
func (*AlertConfig) DeepCopy ¶
func (in *AlertConfig) DeepCopy() *AlertConfig
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AlertConfig.
func (*AlertConfig) DeepCopyInto ¶
func (in *AlertConfig) DeepCopyInto(out *AlertConfig)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type AuthTypeEnum ¶
type AuthTypeEnum string
+kubebuilder:validation:Enum=accessKey;serviceAccountRole
const ( AuthTypeAccessKey AuthTypeEnum = "accessKey" AuthTypeServiceAccountRole AuthTypeEnum = "serviceAccountRole" )
type AutoFreeze ¶
type AutoFreeze struct { Qos QoSLevel `json:"qos,omitempty"` FreezeToMemTTL string `json:"freezeToMemTTL,omitempty"` FreezeToDiskTTL string `json:"freezeToDiskTTL,omitempty"` Enable *bool `json:"enable,omitempty"` }
func (*AutoFreeze) DeepCopy ¶
func (in *AutoFreeze) DeepCopy() *AutoFreeze
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AutoFreeze.
func (*AutoFreeze) DeepCopyInto ¶
func (in *AutoFreeze) DeepCopyInto(out *AutoFreeze)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type AutoScalingConfig ¶
type AutoScalingConfig struct { // layer 1 vertical auto-scaling, turbo burst to existing GPU cards quickly // VPA-like, aggregate metrics data <1m AutoSetLimits AutoSetLimits `json:"autoSetLimits,omitempty"` // layer 2 horizontal auto-scaling, scale up to more GPU cards if max limits threshold hit // HPA-like, aggregate metrics data 1m-1h (when tf-worker scaled-up, should also trigger client pod's owner[Deployment etc.]'s replica increasing, check if KNative works) AutoSetReplicas AutoSetReplicas `json:"autoSetReplicas,omitempty"` // layer 3 adjusting, to match the actual usage in the long run // Adjust baseline requests to match the actual usage in longer period, such as 1day - 2weeks AutoSetRequests AutoSetRequests `json:"autoSetRequests,omitempty"` // additional layer to save VRAM, auto-freeze memory and cool down to RAM and Disk // Hypervisor will monitor and trigger freeze of inactive workers, Operator should mark them as scaled-to-zero and release the GPU pool resources, don't scale down CPU client part, so that they can continue to serve the traffic or scale down by other auto-scaling solutions like KEDA/KNative ScaleToZero ScaleToZero `json:"scaleToZero,omitempty"` }
func (*AutoScalingConfig) DeepCopy ¶
func (in *AutoScalingConfig) DeepCopy() *AutoScalingConfig
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AutoScalingConfig.
func (*AutoScalingConfig) DeepCopyInto ¶
func (in *AutoScalingConfig) DeepCopyInto(out *AutoScalingConfig)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type AutoSetLimits ¶
type AutoSetLimits struct { Enable bool `json:"enable,omitempty"` // target resource to scale limits, such as "tflops", "vram", or "all" by default TargetResource string `json:"targetResource,omitempty"` EvaluationPeriod string `json:"evaluationPeriod,omitempty"` ExtraTFlopsBufferRatio string `json:"extraTFlopsBufferRatio,omitempty"` IgnoredDeltaRange string `json:"ignoredDeltaRange,omitempty"` ScaleUpStep string `json:"scaleUpStep,omitempty"` // the multiplier of requests, to avoid limit set too high, like 5.0 MaxRatioToRequests string `json:"maxRatioToRequests,omitempty"` Prediction *SmartSchedulerModelInput `json:"prediction,omitempty"` }
A typical autoLimits algorithm could be checking every 5m, look back 1 day data, select 99% of actual usage as preferredLimits, calculate finalPreferredLimits, which is preferredLimits*(1+extraBufferRatio) if they are equal with each other within a range (eg. 5%), do nothing if finalPreferredLimits is less than current limits and exceeded error range, set current limits to finalPreferredLimits if finalPreferredLimits > current limits and exceeded error range, set current limits to max(finalPreferredLimits, current limits * scaleUpStep) if AI prediction enabled, it helps to detect history pattern, and set more reasonable, explainable limit value the final set limits should be max(finalPreferredLimits, last(predict_value * (1 + extraTFlopsBufferRatio)))
func (*AutoSetLimits) DeepCopy ¶
func (in *AutoSetLimits) DeepCopy() *AutoSetLimits
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AutoSetLimits.
func (*AutoSetLimits) DeepCopyInto ¶
func (in *AutoSetLimits) DeepCopyInto(out *AutoSetLimits)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type AutoSetReplicas ¶
type AutoSetReplicas struct { Enable bool `json:"enable,omitempty"` TargetTFlopsOfLimits string `json:"targetTFlopsOfLimits,omitempty"` EvaluationPeriod string `json:"evaluationPeriod,omitempty"` ScaleUpStep string `json:"scaleUpStep,omitempty"` ScaleDownStep string `json:"scaleDownStep,omitempty"` ScaleUpCoolDownTime string `json:"scaleUpCoolDownTime,omitempty"` ScaleDownCoolDownTime string `json:"scaleDownCoolDownTime,omitempty"` }
To handle burst traffic, scale up in short time (this feature requires GPU context migration & replication, not available yet)
func (*AutoSetReplicas) DeepCopy ¶
func (in *AutoSetReplicas) DeepCopy() *AutoSetReplicas
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AutoSetReplicas.
func (*AutoSetReplicas) DeepCopyInto ¶
func (in *AutoSetReplicas) DeepCopyInto(out *AutoSetReplicas)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type AutoSetRequests ¶
type AutoSetRequests struct { Enable bool `json:"enable,omitempty"` // target resource to scale requests, such as "tflops", "vram", or "all" by default TargetResource string `json:"targetResource,omitempty"` PercentileForAutoRequests string `json:"percentileForAutoRequests,omitempty"` // the request buffer ratio, for example actual usage is 1.0, 10% buffer will be 1.1 as final preferred requests ExtraBufferRatio string `json:"extraBufferRatio,omitempty"` EvaluationPeriod string `json:"evaluationPeriod,omitempty"` AggregationPeriod string `json:"aggregationPeriod,omitempty"` Prediction SmartSchedulerModelInput `json:"prediction,omitempty"` }
func (*AutoSetRequests) DeepCopy ¶
func (in *AutoSetRequests) DeepCopy() *AutoSetRequests
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AutoSetRequests.
func (*AutoSetRequests) DeepCopyInto ¶
func (in *AutoSetRequests) DeepCopyInto(out *AutoSetRequests)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type BudgetExceedStrategy ¶
type BudgetExceedStrategy string
+kubebuilder:validation:Enum=AlertOnly;AlertAndTerminateVM
const ( BudgetExceedStrategyAlertOnly BudgetExceedStrategy = "AlertOnly" BudgetExceedStrategyAlertAndTerminateVM BudgetExceedStrategy = "AlertAndTerminateVM" )
type CapacityConfig ¶
type CapacityConfig struct { // +optional MinResources *GPUOrCPUResourceUnit `json:"minResources,omitempty"` // +optional MaxResources *GPUOrCPUResourceUnit `json:"maxResources,omitempty"` // +optional WarmResources *GPUOrCPUResourceUnit `json:"warmResources,omitempty"` // +optional Oversubscription *Oversubscription `json:"oversubscription,omitempty"` }
func (*CapacityConfig) DeepCopy ¶
func (in *CapacityConfig) DeepCopy() *CapacityConfig
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CapacityConfig.
func (*CapacityConfig) DeepCopyInto ¶
func (in *CapacityConfig) DeepCopyInto(out *CapacityConfig)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ClientConfig ¶
type ClientConfig struct { OperatorEndpoint string `json:"operatorEndpoint,omitempty"` // +optional PatchToPod *runtime.RawExtension `json:"patchToPod,omitempty"` // +optional PatchToContainer *runtime.RawExtension `json:"patchToContainer,omitempty"` }
func (*ClientConfig) DeepCopy ¶
func (in *ClientConfig) DeepCopy() *ClientConfig
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClientConfig.
func (*ClientConfig) DeepCopyInto ¶
func (in *ClientConfig) DeepCopyInto(out *ClientConfig)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ComponentConfig ¶
type ComponentConfig struct { // +optional Worker *WorkerConfig `json:"worker,omitempty"` // +optional Hypervisor *HypervisorConfig `json:"hypervisor,omitempty"` // +optional NodeDiscovery *NodeDiscoveryConfig `json:"nodeDiscovery,omitempty"` // +optional Client *ClientConfig `json:"client,omitempty"` }
Customize system components for seamless onboarding.
func (*ComponentConfig) DeepCopy ¶
func (in *ComponentConfig) DeepCopy() *ComponentConfig
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ComponentConfig.
func (*ComponentConfig) DeepCopyInto ¶
func (in *ComponentConfig) DeepCopyInto(out *ComponentConfig)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ComputingVendorConfig ¶
type ComputingVendorConfig struct { Name string `json:"name,omitempty"` // support popular cloud providers Type ComputingVendorName `json:"type,omitempty"` AuthType AuthTypeEnum `json:"authType,omitempty"` // Authentication type (e.g., accessKey, serviceAccount). // +optional // +kubebuilder:default=true Enable *bool `json:"enable,omitempty"` // Enable or disable the computing vendor. Params ComputingVendorParams `json:"params,omitempty"` }
ComputingVendorConfig defines the Cloud vendor connection such as AWS, GCP, Azure etc.
func (*ComputingVendorConfig) DeepCopy ¶
func (in *ComputingVendorConfig) DeepCopy() *ComputingVendorConfig
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ComputingVendorConfig.
func (*ComputingVendorConfig) DeepCopyInto ¶
func (in *ComputingVendorConfig) DeepCopyInto(out *ComputingVendorConfig)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ComputingVendorName ¶
type ComputingVendorName string
+kubebuilder:validation:Enum=aws;lambda-labs;gcp;azure;oracle-oci;ibm;openshift;vultr;together-ai;alibaba;nvidia;tencent;runpod;mock
const ( ComputingVendorAWS ComputingVendorName = "aws" ComputingVendorGCP ComputingVendorName = "gcp" ComputingVendorAzure ComputingVendorName = "azure" ComputingVendorOracle ComputingVendorName = "oracle-oci" ComputingVendorIBM ComputingVendorName = "ibm" ComputingVendorOpenShift ComputingVendorName = "openshift" ComputingVendorVultr ComputingVendorName = "vultr" ComputingVendorTogetherAI ComputingVendorName = "together-ai" ComputingVendorLambdaLabs ComputingVendorName = "lambda-labs" ComputingVendorAlibaba ComputingVendorName = "alibaba" ComputingVendorNvidia ComputingVendorName = "nvidia" ComputingVendorTencent ComputingVendorName = "tencent" ComputingVendorRunPod ComputingVendorName = "runpod" // This is not unit/integration testing only, no cloud provider is involved ComputingVendorMock ComputingVendorName = "mock" )
type ComputingVendorParams ¶
type ComputingVendorParams struct { // +optional DefaultRegion string `json:"defaultRegion,omitempty"` // Region for the computing vendor. // the secret of access key and secret key or config file, must be mounted as file path // +optional AccessKeyPath string `json:"accessKeyPath,omitempty"` // +optional SecretKeyPath string `json:"secretKeyPath,omitempty"` // preferred IAM role since it's more secure // +optional IAMRole string `json:"iamRole,omitempty"` // +optional ConfigFile string `json:"configFile,omitempty"` // +optional // User can set extra cloud vendor params, eg. // in ali cloud:" spotPriceLimit, spotDuration, spotInterruptionBehavior, systemDiskCategory, systemDiskSize, dataDiskPerformanceLevel // in aws cloud: TODO ExtraParams map[string]string `json:"extraParams,omitempty"` }
func (*ComputingVendorParams) DeepCopy ¶
func (in *ComputingVendorParams) DeepCopy() *ComputingVendorParams
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ComputingVendorParams.
func (*ComputingVendorParams) DeepCopyInto ¶
func (in *ComputingVendorParams) DeepCopyInto(out *ComputingVendorParams)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type DataPipeline4ResourcesConfig ¶
type DataPipeline4ResourcesConfig struct { // +optional SyncToCloud *bool `json:"syncToCloud,omitempty"` // Whether to sync resources to the cloud. // +optional human readable time like 1h, 1d, default to 1h SyncPeriod string `json:"syncPeriod,omitempty"` // Period for syncing resources. }
func (*DataPipeline4ResourcesConfig) DeepCopy ¶
func (in *DataPipeline4ResourcesConfig) DeepCopy() *DataPipeline4ResourcesConfig
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DataPipeline4ResourcesConfig.
func (*DataPipeline4ResourcesConfig) DeepCopyInto ¶
func (in *DataPipeline4ResourcesConfig) DeepCopyInto(out *DataPipeline4ResourcesConfig)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type DataPipeline4TimeSeriesConfig ¶
type DataPipeline4TimeSeriesConfig struct { AggregationPeriods []string `json:"aggregationPeriods,omitempty"` // List of aggregation periods. RawDataRetention string `json:"rawDataRetention,omitempty"` // Retention period for raw data. AggregationDataRetention string `json:"aggregationDataRetention,omitempty"` // Retention period for aggregated data. RemoteWrite RemoteWriteConfig `json:"remoteWrite,omitempty"` // Configuration for remote write. }
func (*DataPipeline4TimeSeriesConfig) DeepCopy ¶
func (in *DataPipeline4TimeSeriesConfig) DeepCopy() *DataPipeline4TimeSeriesConfig
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DataPipeline4TimeSeriesConfig.
func (*DataPipeline4TimeSeriesConfig) DeepCopyInto ¶
func (in *DataPipeline4TimeSeriesConfig) DeepCopyInto(out *DataPipeline4TimeSeriesConfig)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type DataPipelineResultRemoteWriteConfig ¶
type DataPipelineResultRemoteWriteConfig struct { Type string `json:"type,omitempty"` // Type of the connection (e.g., datadog). URL string `json:"url,omitempty"` // URL of the connection. }
func (*DataPipelineResultRemoteWriteConfig) DeepCopy ¶
func (in *DataPipelineResultRemoteWriteConfig) DeepCopy() *DataPipelineResultRemoteWriteConfig
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DataPipelineResultRemoteWriteConfig.
func (*DataPipelineResultRemoteWriteConfig) DeepCopyInto ¶
func (in *DataPipelineResultRemoteWriteConfig) DeepCopyInto(out *DataPipelineResultRemoteWriteConfig)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type DataPipelinesConfig ¶
type DataPipelinesConfig struct { Resources DataPipeline4ResourcesConfig `json:"resources,omitempty"` Timeseries DataPipeline4TimeSeriesConfig `json:"timeseries,omitempty"` }
DataPipelinesConfig defines the aggregation jobs that can make statistics on the data and then report to cloud if configured.
func (*DataPipelinesConfig) DeepCopy ¶
func (in *DataPipelinesConfig) DeepCopy() *DataPipelinesConfig
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DataPipelinesConfig.
func (*DataPipelinesConfig) DeepCopyInto ¶
func (in *DataPipelinesConfig) DeepCopyInto(out *DataPipelinesConfig)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPU ¶
type GPU struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Status GPUStatus `json:"status,omitempty"` }
+kubebuilder:object:root=true +kubebuilder:subresource:status +kubebuilder:resource:scope=Cluster +kubebuilder:printcolumn:name="GPU Model",type="string",JSONPath=".spec.gpuModel" +kubebuilder:printcolumn:name="Phase",type="string",JSONPath=".status.phase" +kubebuilder:printcolumn:name="Total TFlops",type="string",JSONPath=".status.capacity.tflops" +kubebuilder:printcolumn:name="Total VRAM",type="string",JSONPath=".status.capacity.vram" +kubebuilder:printcolumn:name="Available TFlops",type="string",JSONPath=".status.available.tflops" +kubebuilder:printcolumn:name="Available VRAM",type="string",JSONPath=".status.available.vram" +kubebuilder:printcolumn:name="Device UUID",type="string",JSONPath=".status.uuid" GPU is the Schema for the gpus API.
func (*GPU) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPU.
func (*GPU) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*GPU) DeepCopyObject ¶
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type GPUFilter ¶
type GPUFilter struct { Type string `json:"type,omitempty"` Params runtime.RawExtension `json:"params,omitempty"` }
GPUFilter is to select eligible GPUs for scheduling.
example: ```yaml - type: avoidTooMuchConnectionsOnSameGPU params:
connectionNum: 150
- type: avoidDifferentZone params:
# by default, GPU worker will be scheduled into the same zone as CPU Client Pod to align AZ and improve performance topologyKey: topology.kubernetes.io/zone
```
func (*GPUFilter) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUFilter.
func (*GPUFilter) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPUList ¶
type GPUList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` Items []GPU `json:"items"` }
GPUList contains a list of GPU.
func (*GPUList) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUList.
func (*GPUList) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*GPUList) DeepCopyObject ¶
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type GPUNode ¶
type GPUNode struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec GPUNodeSpec `json:"spec,omitempty"` Status GPUNodeStatus `json:"status,omitempty"` }
+kubebuilder:object:root=true +kubebuilder:subresource:status +kubebuilder:resource:scope=Cluster +kubebuilder:printcolumn:name="Phase",type="string",JSONPath=".status.phase" +kubebuilder:printcolumn:name="Total TFlops",type="string",JSONPath=".status.totalTFlops" +kubebuilder:printcolumn:name="Total VRAM",type="string",JSONPath=".status.totalVRAM" +kubebuilder:printcolumn:name="Virtual TFlops",type="string",JSONPath=".status.virtualTFlops" +kubebuilder:printcolumn:name="Virtual VRAM",type="string",JSONPath=".status.virtualVRAM" +kubebuilder:printcolumn:name="Available TFlops",type="string",JSONPath=".status.availableTFlops" +kubebuilder:printcolumn:name="Available VRAM",type="string",JSONPath=".status.availableVRAM" +kubebuilder:printcolumn:name="GPU Count",type="integer",JSONPath=".status.totalGPUs" GPUNode is the Schema for the gpunodes API.
func (*GPUNode) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUNode.
func (*GPUNode) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*GPUNode) DeepCopyObject ¶
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (*GPUNode) InitializeStatus ¶
func (*GPUNode) SetAnnotationToTriggerNodeSync ¶
func (node *GPUNode) SetAnnotationToTriggerNodeSync()
type GPUNodeAllocationDetails ¶
type GPUNodeAllocationDetails struct { PodID string `json:"podID,omitempty"` PodName string `json:"podName,omitempty"` Namespace string `json:"namespace"` WorkloadName string `json:"workload,omitempty"` Requests GPUResourceUnit `json:"requests"` Limits GPUResourceUnit `json:"limits"` QoS QoSLevel `json:"qos,omitempty"` }
func (*GPUNodeAllocationDetails) DeepCopy ¶
func (in *GPUNodeAllocationDetails) DeepCopy() *GPUNodeAllocationDetails
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUNodeAllocationDetails.
func (*GPUNodeAllocationDetails) DeepCopyInto ¶
func (in *GPUNodeAllocationDetails) DeepCopyInto(out *GPUNodeAllocationDetails)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPUNodeClass ¶
type GPUNodeClass struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec GPUNodeClassSpec `json:"spec,omitempty"` Status GPUNodeClassStatus `json:"status,omitempty"` }
+kubebuilder:object:root=true +kubebuilder:subresource:status +kubebuilder:resource:scope=Cluster GPUNodeClass is the Schema for the gpunodeclasses API.
func (*GPUNodeClass) DeepCopy ¶
func (in *GPUNodeClass) DeepCopy() *GPUNodeClass
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUNodeClass.
func (*GPUNodeClass) DeepCopyInto ¶
func (in *GPUNodeClass) DeepCopyInto(out *GPUNodeClass)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*GPUNodeClass) DeepCopyObject ¶
func (in *GPUNodeClass) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type GPUNodeClassList ¶
type GPUNodeClassList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` Items []GPUNodeClass `json:"items"` }
GPUNodeClassList contains a list of GPUNodeClass.
func (*GPUNodeClassList) DeepCopy ¶
func (in *GPUNodeClassList) DeepCopy() *GPUNodeClassList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUNodeClassList.
func (*GPUNodeClassList) DeepCopyInto ¶
func (in *GPUNodeClassList) DeepCopyInto(out *GPUNodeClassList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*GPUNodeClassList) DeepCopyObject ¶
func (in *GPUNodeClassList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type GPUNodeClassSpec ¶
type GPUNodeClassSpec struct { // +optional // The launch template to use for VM instances, if set, all other fields could be skipped LaunchTemplate NodeClassItemSelectorTerms `json:"launchTemplate"` // +optional // Could be private or public, varies in different cloud vendor, define where to query the OSImageID // +kubebuilder:default="Private" OSImageType OSImageTypeEnum `json:"osImageType,omitempty"` // the OS image identifier string, default to use first one, if not found, fallback to others OSImageSelectorTerms []NodeClassItemSelectorTerms `json:"osImageSelectorTerms,omitempty"` // +optional // The instance profile to use, assign IAM role and permissions for EC2 instances InstanceProfile string `json:"instanceProfile,omitempty"` // +optional // for AWS only, IMDSv2 metadata service options MetadataOptions *NodeClassMetadataOptions `json:"metadataOptions,omitempty"` // +optional SecurityGroupSelectorTerms []NodeClassItemSelectorTerms `json:"securityGroupSelectorTerms,omitempty"` // +optional SubnetSelectorTerms []NodeClassItemSelectorTerms `json:"subnetSelectorTerms,omitempty"` // Terms to select subnets // +optional BlockDeviceMappings []NodeClassBlockDeviceMappings `json:"blockDeviceMappings,omitempty"` // Block device mappings for the instance // +optional Tags map[string]string `json:"tags,omitempty"` // Tags associated with the resource // +optional UserData string `json:"userData,omitempty"` // User data script for the instance }
GPUNodeClassSpec defines the desired state of GPUNodeClass.
func (*GPUNodeClassSpec) DeepCopy ¶
func (in *GPUNodeClassSpec) DeepCopy() *GPUNodeClassSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUNodeClassSpec.
func (*GPUNodeClassSpec) DeepCopyInto ¶
func (in *GPUNodeClassSpec) DeepCopyInto(out *GPUNodeClassSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPUNodeClassStatus ¶
type GPUNodeClassStatus struct { }
GPUNodeClassStatus defines the observed state of GPUNodeClass.
func (*GPUNodeClassStatus) DeepCopy ¶
func (in *GPUNodeClassStatus) DeepCopy() *GPUNodeClassStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUNodeClassStatus.
func (*GPUNodeClassStatus) DeepCopyInto ¶
func (in *GPUNodeClassStatus) DeepCopyInto(out *GPUNodeClassStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPUNodeInfo ¶
type GPUNodeInfo struct { // +optional // only set when node is managed by TensorFusion InstanceID string `json:"instanceID,omitempty"` Region string `json:"region,omitempty"` Hostname string `json:"hostname,omitempty"` IP string `json:"ip,omitempty"` KernelVersion string `json:"kernelVersion,omitempty"` OSImage string `json:"osImage,omitempty"` GPUDriverVersion string `json:"gpuDriverVersion,omitempty"` GPUModel string `json:"gpuModel,omitempty"` GPUCount int32 `json:"gpuCount,omitempty"` OperatingSystem string `json:"operatingSystem,omitempty"` Architecture string `json:"architecture,omitempty"` // Additional space for L1/L2 VRAM buffer RAMSize resource.Quantity `json:"ramSize,omitempty"` DataDiskSize resource.Quantity `json:"dataDiskSize,omitempty"` }
func (*GPUNodeInfo) DeepCopy ¶
func (in *GPUNodeInfo) DeepCopy() *GPUNodeInfo
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUNodeInfo.
func (*GPUNodeInfo) DeepCopyInto ¶
func (in *GPUNodeInfo) DeepCopyInto(out *GPUNodeInfo)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPUNodeList ¶
type GPUNodeList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` Items []GPUNode `json:"items"` }
GPUNodeList contains a list of GPUNode.
func (*GPUNodeList) DeepCopy ¶
func (in *GPUNodeList) DeepCopy() *GPUNodeList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUNodeList.
func (*GPUNodeList) DeepCopyInto ¶
func (in *GPUNodeList) DeepCopyInto(out *GPUNodeList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*GPUNodeList) DeepCopyObject ¶
func (in *GPUNodeList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type GPUNodeManageMode ¶
type GPUNodeManageMode string
+kubebuilder:validation:Enum=Manual;AutoSelect;Provisioned
const ( GPUNodeManageModeManual GPUNodeManageMode = "Manual" GPUNodeManageModeAutoSelect GPUNodeManageMode = "AutoSelect" GPUNodeManageModeProvisioned GPUNodeManageMode = "Provisioned" )
type GPUNodeSpec ¶
type GPUNodeSpec struct { // +kubebuilder:default=AutoSelect ManageMode GPUNodeManageMode `json:"manageMode,omitempty"` // +optional CostPerHour string `json:"costPerHour,omitempty"` // if not all GPU cards should be used, specify the GPU card indices, default to empty, // onboard all GPU cards to the pool // +optional GPUCardIndices []int `json:"gpuCardIndices,omitempty"` // +optional CloudVendorParam string `json:"cloudVendorParam,omitempty"` }
GPUNodeSpec defines the desired state of GPUNode.
func (*GPUNodeSpec) DeepCopy ¶
func (in *GPUNodeSpec) DeepCopy() *GPUNodeSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUNodeSpec.
func (*GPUNodeSpec) DeepCopyInto ¶
func (in *GPUNodeSpec) DeepCopyInto(out *GPUNodeSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPUNodeStatus ¶
type GPUNodeStatus struct { // the identifier of the kubernetes node, in nodeSelector mode, GPUNode name is the same as kubernetes node name because of it's owned by the Kubernetes node, while in node provisioning mode owned by the GPUNode, and K8S Node name is uncontrollable KubernetesNodeName string `json:"kubernetesNodeName"` // +kubebuilder:default=Pending Phase TensorFusionGPUNodePhase `json:"phase"` // +optional Conditions []metav1.Condition `json:"conditions,omitempty"` TotalTFlops resource.Quantity `json:"totalTFlops"` TotalVRAM resource.Quantity `json:"totalVRAM"` VirtualTFlops resource.Quantity `json:"virtualTFlops"` VirtualVRAM resource.Quantity `json:"virtualVRAM"` AvailableTFlops resource.Quantity `json:"availableTFlops"` AvailableVRAM resource.Quantity `json:"availableVRAM"` // +optional VirtualAvailableTFlops *resource.Quantity `json:"virtualAvailableTFlops,omitempty"` // +optional VirtualAvailableVRAM *resource.Quantity `json:"virtualAvailableVRAM,omitempty"` // +optional HypervisorStatus NodeHypervisorStatus `json:"hypervisorStatus,omitempty"` // +optional NodeInfo GPUNodeInfo `json:"nodeInfo,omitempty"` // +optional LoadedModels *[]string `json:"loadedModels,omitempty"` TotalGPUs int32 `json:"totalGPUs"` ManagedGPUs int32 `json:"managedGPUs"` // +optional ManagedGPUDeviceIDs []string `json:"managedGPUDeviceIDs,omitempty"` ObservedGeneration int64 `json:"observedGeneration,omitempty"` // Allocation details is for node compaction, and calculate used apps // +optional AllocationDetails *[]GPUNodeAllocationDetails `json:"allocationDetails,omitempty"` }
GPUNodeStatus defines the observed state of GPUNode.
func (*GPUNodeStatus) DeepCopy ¶
func (in *GPUNodeStatus) DeepCopy() *GPUNodeStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUNodeStatus.
func (*GPUNodeStatus) DeepCopyInto ¶
func (in *GPUNodeStatus) DeepCopyInto(out *GPUNodeStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPUOrCPUResourceUnit ¶
type GPUOrCPUResourceUnit struct { TFlops resource.Quantity `json:"tflops,omitempty"` VRAM resource.Quantity `json:"vram,omitempty"` // CPU/Memory is only available when CloudVendor connection is enabled // +optional CPU resource.Quantity `json:"cpu,omitempty"` // +optional Memory resource.Quantity `json:"memory,omitempty"` }
func (*GPUOrCPUResourceUnit) DeepCopy ¶
func (in *GPUOrCPUResourceUnit) DeepCopy() *GPUOrCPUResourceUnit
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUOrCPUResourceUnit.
func (*GPUOrCPUResourceUnit) DeepCopyInto ¶
func (in *GPUOrCPUResourceUnit) DeepCopyInto(out *GPUOrCPUResourceUnit)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPUPool ¶
type GPUPool struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec GPUPoolSpec `json:"spec,omitempty"` Status GPUPoolStatus `json:"status,omitempty"` }
+kubebuilder:printcolumn:name="Phase",type="string",JSONPath=".status.phase" +kubebuilder:printcolumn:name="TFlops Oversubscription",type="string",JSONPath=".spec.capacityConfig.oversubscription.tflopsOversellRatio" +kubebuilder:printcolumn:name="Mode",type="string",JSONPath=".status.mode" +kubebuilder:printcolumn:name="Default Scheduling Strategy",type="string",JSONPath=".spec.schedulingConfigTemplate" +kubebuilder:printcolumn:name="Total Nodes",type="string",JSONPath=".status.totalNodes" +kubebuilder:printcolumn:name="Total GPU",type="string",JSONPath=".status.totalGPUs" +kubebuilder:printcolumn:name="Total Tflops",type="string",JSONPath=".status.totalTFlops" +kubebuilder:printcolumn:name="Total VRAM",type="string",JSONPath=".status.totalVRAM" +kubebuilder:printcolumn:name="Available Tflops",type="string",JSONPath=".status.availableTFlops" +kubebuilder:printcolumn:name="Available VRAM",type="string",JSONPath=".status.availableVRAM"
func (*GPUPool) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUPool.
func (*GPUPool) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*GPUPool) DeepCopyObject ¶
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type GPUPoolDefinition ¶
type GPUPoolDefinition struct { Name string `json:"name,omitempty"` // Name of the GPU pool. SpecTemplate GPUPoolSpec `json:"specTemplate"` }
GPUPool defines how to create a GPU pool, could be URL or inline
func (*GPUPoolDefinition) DeepCopy ¶
func (in *GPUPoolDefinition) DeepCopy() *GPUPoolDefinition
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUPoolDefinition.
func (*GPUPoolDefinition) DeepCopyInto ¶
func (in *GPUPoolDefinition) DeepCopyInto(out *GPUPoolDefinition)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPUPoolList ¶
type GPUPoolList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` Items []GPUPool `json:"items"` }
GPUPoolList contains a list of GPUPool.
func (*GPUPoolList) DeepCopy ¶
func (in *GPUPoolList) DeepCopy() *GPUPoolList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUPoolList.
func (*GPUPoolList) DeepCopyInto ¶
func (in *GPUPoolList) DeepCopyInto(out *GPUPoolList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*GPUPoolList) DeepCopyObject ¶
func (in *GPUPoolList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type GPUPoolSpec ¶
type GPUPoolSpec struct { CapacityConfig *CapacityConfig `json:"capacityConfig,omitempty"` NodeManagerConfig *NodeManagerConfig `json:"nodeManagerConfig,omitempty"` // +optional ObservabilityConfig *ObservabilityConfig `json:"observabilityConfig,omitempty"` // +optional QosConfig *QosConfig `json:"qosConfig,omitempty"` // +optional ComponentConfig *ComponentConfig `json:"componentConfig,omitempty"` // +optional SchedulingConfigTemplate *string `json:"schedulingConfigTemplate,omitempty"` }
GPUPoolSpec defines the desired state of GPUPool.
func (*GPUPoolSpec) DeepCopy ¶
func (in *GPUPoolSpec) DeepCopy() *GPUPoolSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUPoolSpec.
func (*GPUPoolSpec) DeepCopyInto ¶
func (in *GPUPoolSpec) DeepCopyInto(out *GPUPoolSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPUPoolStatus ¶
type GPUPoolStatus struct { Cluster string `json:"cluster,omitempty"` // +kubebuilder:default=Pending Phase TensorFusionPoolPhase `json:"phase"` Conditions []metav1.Condition `json:"conditions,omitempty"` TotalNodes int32 `json:"totalNodes,omitempty"` TotalGPUs int32 `json:"totalGPUs,omitempty"` ReadyNodes int32 `json:"readyNodes"` NotReadyNodes int32 `json:"notReadyNodes"` TotalTFlops resource.Quantity `json:"totalTFlops"` TotalVRAM resource.Quantity `json:"totalVRAM"` VirtualTFlops resource.Quantity `json:"virtualTFlops"` VirtualVRAM resource.Quantity `json:"virtualVRAM"` AvailableTFlops resource.Quantity `json:"availableTFlops"` AvailableVRAM resource.Quantity `json:"availableVRAM"` // +optional VirtualAvailableTFlops *resource.Quantity `json:"virtualAvailableTFlops,omitempty"` // +optional VirtualAvailableVRAM *resource.Quantity `json:"virtualAvailableVRAM,omitempty"` // when updating any component version or config, pool controller will perform rolling update. // the status will be updated periodically, default to 5s, progress will be 0-100. // when the progress is 100, the component version or config is fully updated. ComponentStatus PoolComponentStatus `json:"componentStatus"` // TODO: calculated every 1h/1d/1w average UtilizedTFlopsPercent string `json:"utilizedTFlopsPercent,omitempty"` UtilizedVRAMPercent string `json:"utilizedVRAMPercent,omitempty"` // TODO: updated with interval AllocatedTFlopsPercent string `json:"allocatedTFlopsPercent,omitempty"` AllocatedVRAMPercent string `json:"allocatedVRAMPercent,omitempty"` // TODO: aggregated with interval SavedCostsPerMonth string `json:"savedCostsPerMonth,omitempty"` PotentialSavingsPerMonth string `json:"potentialSavingsPerMonth,omitempty"` // +kubebuilder:default="" // If the budget is exceeded, the set value in comma separated string to indicate which period caused the exceeding. // If this field is not empty, scheduler will not schedule new AI workloads and stop scaling-up check. // TODO not implemented yet BudgetExceeded string `json:"budgetExceeded,omitempty"` // +optional LastCompactionTime *metav1.Time `json:"lastCompactionTime,omitempty"` }
GPUPoolStatus defines the observed state of GPUPool.
func (*GPUPoolStatus) DeepCopy ¶
func (in *GPUPoolStatus) DeepCopy() *GPUPoolStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUPoolStatus.
func (*GPUPoolStatus) DeepCopyInto ¶
func (in *GPUPoolStatus) DeepCopyInto(out *GPUPoolStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPUResourcePricingUnit ¶
type GPUResourcePricingUnit struct { // +kubebuilder:default="$0.0069228" PerFP16TFlopsPerHour string `json:"perFP16TFlopsPerHour,omitempty"` // +kubebuilder:default="$0.01548" PerGBOfVRAMPerHour string `json:"perGBOfVRAMPerHour,omitempty"` }
The default pricing based on second level pricing from https://modal.com/pricing with Tensor/CUDA Core : HBM = 2:1
func (*GPUResourcePricingUnit) DeepCopy ¶
func (in *GPUResourcePricingUnit) DeepCopy() *GPUResourcePricingUnit
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUResourcePricingUnit.
func (*GPUResourcePricingUnit) DeepCopyInto ¶
func (in *GPUResourcePricingUnit) DeepCopyInto(out *GPUResourcePricingUnit)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPUResourceUnit ¶
type GPUResourceUnit struct { // Tera floating point operations per second TFlops resource.Quantity `json:"tflops,omitempty"` // VRAM is short for Video memory, namely GPU RAM VRAM resource.Quantity `json:"vram,omitempty"` }
func (*GPUResourceUnit) DeepCopy ¶
func (in *GPUResourceUnit) DeepCopy() *GPUResourceUnit
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUResourceUnit.
func (*GPUResourceUnit) DeepCopyInto ¶
func (in *GPUResourceUnit) DeepCopyInto(out *GPUResourceUnit)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type GPUStatus ¶
type GPUStatus struct { // +kubebuilder:default=Pending Phase TensorFusionGPUPhase `json:"phase"` Capacity *Resource `json:"capacity"` Available *Resource `json:"available"` UUID string `json:"uuid"` // The host match selector to schedule worker pods NodeSelector map[string]string `json:"nodeSelector"` GPUModel string `json:"gpuModel"` Message string `json:"message"` }
GPUStatus defines the observed state of GPU.
func (*GPUStatus) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUStatus.
func (*GPUStatus) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type HypervisorConfig ¶
type HypervisorConfig struct { // +optional PodTemplate *runtime.RawExtension `json:"podTemplate,omitempty"` }
func (*HypervisorConfig) DeepCopy ¶
func (in *HypervisorConfig) DeepCopy() *HypervisorConfig
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HypervisorConfig.
func (*HypervisorConfig) DeepCopyInto ¶
func (in *HypervisorConfig) DeepCopyInto(out *HypervisorConfig)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type HypervisorScheduling ¶
type HypervisorScheduling struct {
MultiProcessQueuing MultiProcessQueuing `json:"multiProcessQueuing,omitempty"`
}
func (*HypervisorScheduling) DeepCopy ¶
func (in *HypervisorScheduling) DeepCopy() *HypervisorScheduling
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HypervisorScheduling.
func (*HypervisorScheduling) DeepCopyInto ¶
func (in *HypervisorScheduling) DeepCopyInto(out *HypervisorScheduling)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type MaintenanceWindow ¶
type MaintenanceWindow struct { // crontab syntax. Includes []string `json:"includes,omitempty"` }
func (*MaintenanceWindow) DeepCopy ¶
func (in *MaintenanceWindow) DeepCopy() *MaintenanceWindow
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MaintenanceWindow.
func (*MaintenanceWindow) DeepCopyInto ¶
func (in *MaintenanceWindow) DeepCopyInto(out *MaintenanceWindow)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type MonitorConfig ¶
type MonitorConfig struct {
Interval string `json:"interval,omitempty"`
}
func (*MonitorConfig) DeepCopy ¶
func (in *MonitorConfig) DeepCopy() *MonitorConfig
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MonitorConfig.
func (*MonitorConfig) DeepCopyInto ¶
func (in *MonitorConfig) DeepCopyInto(out *MonitorConfig)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type MultiProcessQueuing ¶
type MultiProcessQueuing struct { // +optional Enable *bool `json:"enable,omitempty"` Interval string `json:"interval,omitempty"` QueueLevelTimeSlices []string `json:"queueLevelTimeSlices,omitempty"` }
func (*MultiProcessQueuing) DeepCopy ¶
func (in *MultiProcessQueuing) DeepCopy() *MultiProcessQueuing
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MultiProcessQueuing.
func (*MultiProcessQueuing) DeepCopyInto ¶
func (in *MultiProcessQueuing) DeepCopyInto(out *MultiProcessQueuing)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type NameNamespace ¶
type NameNamespace struct { Name string `json:"name,omitempty"` Namespace string `json:"namespace,omitempty"` }
func (*NameNamespace) DeepCopy ¶
func (in *NameNamespace) DeepCopy() *NameNamespace
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NameNamespace.
func (*NameNamespace) DeepCopyInto ¶
func (in *NameNamespace) DeepCopyInto(out *NameNamespace)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type NodeClassBlockDeviceMappings ¶
type NodeClassBlockDeviceMappings struct { // +optional DeviceName string `json:"deviceName,omitempty"` // The device name for the block device EBS NodeClassBlockDeviceSettings `json:"ebs,omitempty"` }
func (*NodeClassBlockDeviceMappings) DeepCopy ¶
func (in *NodeClassBlockDeviceMappings) DeepCopy() *NodeClassBlockDeviceMappings
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeClassBlockDeviceMappings.
func (*NodeClassBlockDeviceMappings) DeepCopyInto ¶
func (in *NodeClassBlockDeviceMappings) DeepCopyInto(out *NodeClassBlockDeviceMappings)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type NodeClassBlockDeviceSettings ¶
type NodeClassBlockDeviceSettings struct { VolumeSize string `json:"volumeSize,omitempty"` // +optional // Default value would varies based on the cloud vendor // For AWS it's gp3, for Alicloud it's cloud_essd VolumeType string `json:"volumeType,omitempty"` // +optional // +kubebuilder:default=true DeleteOnTermination bool `json:"deleteOnTermination,omitempty"` // Whether to delete the EBS volume on termination // +optional // +kubebuilder:default=true Encrypted bool `json:"encrypted,omitempty"` // Whether the EBS volume is encrypted }
func (*NodeClassBlockDeviceSettings) DeepCopy ¶
func (in *NodeClassBlockDeviceSettings) DeepCopy() *NodeClassBlockDeviceSettings
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeClassBlockDeviceSettings.
func (*NodeClassBlockDeviceSettings) DeepCopyInto ¶
func (in *NodeClassBlockDeviceSettings) DeepCopyInto(out *NodeClassBlockDeviceSettings)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type NodeClassItemSelectorTerms ¶
type NodeClassItemSelectorTerms struct { // +optional // The item ID ID string `json:"id,omitempty"` // +optional // The item name Name string `json:"name,omitempty"` // +optional // Query by tags Tags map[string]string `json:"tags,omitempty"` }
func (*NodeClassItemSelectorTerms) DeepCopy ¶
func (in *NodeClassItemSelectorTerms) DeepCopy() *NodeClassItemSelectorTerms
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeClassItemSelectorTerms.
func (*NodeClassItemSelectorTerms) DeepCopyInto ¶
func (in *NodeClassItemSelectorTerms) DeepCopyInto(out *NodeClassItemSelectorTerms)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type NodeClassMetadataOptions ¶
type NodeClassMetadataOptions struct { // +optional // +kubebuilder:default=true HttpEndpoint bool `json:"httpEndpoint,omitempty"` // +optional // +kubebuilder:default=false HttpProtocolIPv6 bool `json:"httpProtocolIPv6,omitempty"` // +optional // +kubebuilder:default=1 HttpPutResponseHopLimit int `json:"httpPutResponseHopLimit,omitempty"` // +optional // +kubebuilder:default="required" HttpTokens string `json:"httpTokens,omitempty"` }
AWS IMDSv2 metadata service options
func (*NodeClassMetadataOptions) DeepCopy ¶
func (in *NodeClassMetadataOptions) DeepCopy() *NodeClassMetadataOptions
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeClassMetadataOptions.
func (*NodeClassMetadataOptions) DeepCopyInto ¶
func (in *NodeClassMetadataOptions) DeepCopyInto(out *NodeClassMetadataOptions)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type NodeCompaction ¶
type NodeCompaction struct { // +kubebuilder:default="5m" Period string `json:"period,omitempty"` }
func (*NodeCompaction) DeepCopy ¶
func (in *NodeCompaction) DeepCopy() *NodeCompaction
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeCompaction.
func (*NodeCompaction) DeepCopyInto ¶
func (in *NodeCompaction) DeepCopyInto(out *NodeCompaction)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type NodeDiscoveryConfig ¶
type NodeDiscoveryConfig struct { // +optional PodTemplate *runtime.RawExtension `json:"podTemplate,omitempty"` }
func (*NodeDiscoveryConfig) DeepCopy ¶
func (in *NodeDiscoveryConfig) DeepCopy() *NodeDiscoveryConfig
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeDiscoveryConfig.
func (*NodeDiscoveryConfig) DeepCopyInto ¶
func (in *NodeDiscoveryConfig) DeepCopyInto(out *NodeDiscoveryConfig)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type NodeHypervisorStatus ¶
type NodeHypervisorStatus struct { HypervisorState string `json:"hypervisorState,omitempty"` HypervisorVersion string `json:"hypervisorVersion,omitempty"` LastHeartbeatTime metav1.Time `json:"lastHeartbeatTime,omitempty"` }
func (*NodeHypervisorStatus) DeepCopy ¶
func (in *NodeHypervisorStatus) DeepCopy() *NodeHypervisorStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeHypervisorStatus.
func (*NodeHypervisorStatus) DeepCopyInto ¶
func (in *NodeHypervisorStatus) DeepCopyInto(out *NodeHypervisorStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type NodeManagerConfig ¶
type NodeManagerConfig struct { // +kubebuilder:default="AutoSelect" ProvisioningMode ProvisioningMode `json:"provisioningMode,omitempty"` // +optional NodeProvisioner *NodeProvisioner `json:"nodeProvisioner,omitempty"` // +optional NodeSelector *corev1.NodeSelector `json:"nodeSelector,omitempty"` // +optional NodeCompaction *NodeCompaction `json:"nodeCompaction,omitempty"` // +optional NodePoolRollingUpdatePolicy *NodeRollingUpdatePolicy `json:"nodePoolRollingUpdatePolicy,omitempty"` }
func (*NodeManagerConfig) DeepCopy ¶
func (in *NodeManagerConfig) DeepCopy() *NodeManagerConfig
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeManagerConfig.
func (*NodeManagerConfig) DeepCopyInto ¶
func (in *NodeManagerConfig) DeepCopyInto(out *NodeManagerConfig)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type NodeProvisioner ¶
type NodeProvisioner struct { // Mode could be Karpenter or Native, for Karpenter mode, node provisioner will start dummy nodes to provision and warmup GPU nodes, do nothing for CPU nodes, for Native mode, provisioner will create or compact GPU & CPU nodes based on current pods // +kubebuilder:default=Native Mode NodeProvisionerMode `json:"mode,omitempty"` NodeClass string `json:"nodeClass,omitempty"` // +optional GPURequirements []Requirement `json:"gpuRequirements,omitempty"` // +optional GPUTaints []Taint `json:"gpuTaints,omitempty"` // +optional GPULabels map[string]string `json:"gpuNodeLabels,omitempty"` // +optional CPURequirements []Requirement `json:"cpuRequirements,omitempty"` // +optional CPUTaints []Taint `json:"cpuTaints,omitempty"` // +optional CPULabels map[string]string `json:"cpuNodeLabels,omitempty"` // +optional // NodeProvisioner will start an virtual billing based on public pricing or customized pricing, if the VM's costs exceeded any budget constraints, the new VM will not be created, and alerts will be generated Budget *PeriodicalBudget `json:"budget,omitempty"` }
NodeProvisioner or NodeSelector, they are exclusive. NodeSelector is for existing GPUs, NodeProvisioner is for Karpenter-like auto management.
func (*NodeProvisioner) DeepCopy ¶
func (in *NodeProvisioner) DeepCopy() *NodeProvisioner
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeProvisioner.
func (*NodeProvisioner) DeepCopyInto ¶
func (in *NodeProvisioner) DeepCopyInto(out *NodeProvisioner)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type NodeProvisionerMode ¶
type NodeProvisionerMode string
+kubebuilder:validation:Enum=Native;Karpenter
const ( NodeProvisionerModeNative NodeProvisionerMode = "Native" NodeProvisionerModeKarpenter NodeProvisionerMode = "Karpenter" )
type NodeRequirementKey ¶
type NodeRequirementKey string
+kubebuilder:validation:Enum=node.kubernetes.io/instance-type;kubernetes.io/arch;kubernetes.io/os;topology.kubernetes.io/region;topology.kubernetes.io/zone;karpenter.sh/capacity-type;tensor-fusion.ai/gpu-arch;tensor-fusion.ai/gpu-instance-family;tensor-fusion.ai/gpu-instance-size
const ( NodeRequirementKeyInstanceType NodeRequirementKey = "node.kubernetes.io/instance-type" NodeRequirementKeyArchitecture NodeRequirementKey = "kubernetes.io/arch" NodeRequirementKeyGPUArchitecture NodeRequirementKey = "tensor-fusion.ai/gpu-arch" NodeRequirementKeyOS NodeRequirementKey = "kubernetes.io/os" NodeRequirementKeyRegion NodeRequirementKey = "topology.kubernetes.io/region" NodeRequirementKeyZone NodeRequirementKey = "topology.kubernetes.io/zone" // capacity-type is charging method, can be spot/preemptive or on-demand NodeRequirementKeyCapacityType NodeRequirementKey = "karpenter.sh/capacity-type" NodeRequirementKeyInstanceFamily NodeRequirementKey = "tensor-fusion.ai/gpu-instance-family" NodeRequirementKeyInstanceSize NodeRequirementKey = "tensor-fusion.ai/gpu-instance-size" )
type NodeRollingUpdatePolicy ¶
type NodeRollingUpdatePolicy struct { // +kubebuilder:default=true // +optional AutoUpdate *bool `json:"autoUpdate,omitempty"` // +kubebuilder:default=100 // +kubebuilder:validation:Minimum=0 // +kubebuilder:validation:Maximum=100 BatchPercentage int32 `json:"batchPercentage,omitempty"` // +kubebuilder:default="10m" BatchInterval string `json:"batchInterval,omitempty"` // +optional // +kubebuilder:default="10m" MaxDuration string `json:"maxDuration,omitempty"` // +optional MaintenanceWindow MaintenanceWindow `json:"maintenanceWindow,omitempty"` }
func (*NodeRollingUpdatePolicy) DeepCopy ¶
func (in *NodeRollingUpdatePolicy) DeepCopy() *NodeRollingUpdatePolicy
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeRollingUpdatePolicy.
func (*NodeRollingUpdatePolicy) DeepCopyInto ¶
func (in *NodeRollingUpdatePolicy) DeepCopyInto(out *NodeRollingUpdatePolicy)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type OSImageTypeEnum ¶
type OSImageTypeEnum string
+kubebuilder:validation:Enum=Private;Public;System
const ( OSImageTypePrivate OSImageTypeEnum = "Private" OSImageTypePublic OSImageTypeEnum = "Public" OSImageTypeSystem OSImageTypeEnum = "System" )
type ObservabilityConfig ¶
type ObservabilityConfig struct { // +optional Monitor *MonitorConfig `json:"monitor,omitempty"` // +optional Alert *AlertConfig `json:"alert,omitempty"` }
func (*ObservabilityConfig) DeepCopy ¶
func (in *ObservabilityConfig) DeepCopy() *ObservabilityConfig
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ObservabilityConfig.
func (*ObservabilityConfig) DeepCopyInto ¶
func (in *ObservabilityConfig) DeepCopyInto(out *ObservabilityConfig)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type Oversubscription ¶
type Oversubscription struct { // the percentage of Host RAM appending to GPU VRAM, default to 50% // +optional // +kubebuilder:default=50 // +kubebuilder:validation:Minimum=0 // +kubebuilder:validation:Maximum=100 VRAMExpandToHostMem int32 `json:"vramExpandToHostMem,omitempty"` // the percentage of Host Disk appending to GPU VRAM, default to 70% // +optional // +kubebuilder:default=70 // +kubebuilder:validation:Minimum=0 // +kubebuilder:validation:Maximum=100 VRAMExpandToHostDisk int32 `json:"vramExpandToHostDisk,omitempty"` // The multi of TFlops to oversell, default to 500%, indicates 5 times oversell // +optional // +kubebuilder:default=500 // +kubebuilder:validation:Minimum=100 // +kubebuilder:validation:Maximum=100000 TFlopsOversellRatio int32 `json:"tflopsOversellRatio,omitempty"` }
func (*Oversubscription) DeepCopy ¶
func (in *Oversubscription) DeepCopy() *Oversubscription
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Oversubscription.
func (*Oversubscription) DeepCopyInto ¶
func (in *Oversubscription) DeepCopyInto(out *Oversubscription)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type PeriodicalBudget ¶
type PeriodicalBudget struct { // +kubebuilder:default="100" BudgetPerDay string `json:"budgetPerDay,omitempty"` // +kubebuilder:default="1000" BudgetPerMonth string `json:"budgetPerMonth,omitempty"` // +kubebuilder:default="3000" BudgetPerQuarter string `json:"budgetPerQuarter,omitempty"` // +kubebuilder:default=AlertOnly BudgetExceedStrategy BudgetExceedStrategy `json:"budgetExceedStrategy,omitempty"` }
The budget constraints in dollars
func (*PeriodicalBudget) DeepCopy ¶
func (in *PeriodicalBudget) DeepCopy() *PeriodicalBudget
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PeriodicalBudget.
func (*PeriodicalBudget) DeepCopyInto ¶
func (in *PeriodicalBudget) DeepCopyInto(out *PeriodicalBudget)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type PlacementConfig ¶
type PlacementConfig struct { // +kubebuilder:default=CompactFirst Mode PlacementMode `json:"mode"` // +kubebuilder:default=true // +optional AllowUsingLocalGPU *bool `json:"allowUsingLocalGPU,omitempty"` // If false, workloads will not be scheduled directly to GPU nodes with 'localGPU: true'. // +optional GPUFilters []GPUFilter `json:"gpuFilters,omitempty"` }
func (*PlacementConfig) DeepCopy ¶
func (in *PlacementConfig) DeepCopy() *PlacementConfig
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PlacementConfig.
func (*PlacementConfig) DeepCopyInto ¶
func (in *PlacementConfig) DeepCopyInto(out *PlacementConfig)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type PlacementMode ¶
type PlacementMode string
+kubebuilder:validation:Enum=CompactFirst;LowLoadFirst
const ( // default to compactFirst for cost saving and energy saving PlacementModeCompactFirst PlacementMode = "CompactFirst" // in some cases, use lowLoadFirst for balance and fairness PlacementModeLowLoadFirst PlacementMode = "LowLoadFirst" )
type PoolComponentStatus ¶
type PoolComponentStatus struct { WorkerVersion string `json:"worker,omitempty"` WorkerConfigSynced bool `json:"workerConfigSynced,omitempty"` WorkerUpdateProgress int32 `json:"workerUpdateProgress,omitempty"` HypervisorVersion string `json:"hypervisor,omitempty"` HypervisorConfigSynced bool `json:"hypervisorConfigSynced,omitempty"` HyperVisorUpdateProgress int32 `json:"hypervisorUpdateProgress,omitempty"` ClientVersion string `json:"client,omitempty"` ClientConfigSynced bool `json:"clientConfigSynced,omitempty"` ClientUpdateProgress int32 `json:"clientUpdateProgress,omitempty"` }
func (*PoolComponentStatus) DeepCopy ¶
func (in *PoolComponentStatus) DeepCopy() *PoolComponentStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PoolComponentStatus.
func (*PoolComponentStatus) DeepCopyInto ¶
func (in *PoolComponentStatus) DeepCopyInto(out *PoolComponentStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type PoolProvisioningStatus ¶
type PoolProvisioningStatus struct { InitializingNodes int32 `json:"initializingNodes,omitempty"` TerminatingNodes int32 `json:"terminatingNodes,omitempty"` AvailableNodes int32 `json:"availableNodes,omitempty"` }
func (*PoolProvisioningStatus) DeepCopy ¶
func (in *PoolProvisioningStatus) DeepCopy() *PoolProvisioningStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PoolProvisioningStatus.
func (*PoolProvisioningStatus) DeepCopyInto ¶
func (in *PoolProvisioningStatus) DeepCopyInto(out *PoolProvisioningStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ProvisioningMode ¶
type ProvisioningMode string
+kubebuilder:validation:Enum=Provisioned;AutoSelect
const ( ProvisioningModeProvisioned ProvisioningMode = "Provisioned" ProvisioningModeAutoSelect ProvisioningMode = "AutoSelect" )
type QosConfig ¶
type QosConfig struct { Definitions []QosDefinition `json:"definitions,omitempty"` DefaultQoS QoSLevel `json:"defaultQoS,omitempty"` Pricing []QosPricing `json:"pricing,omitempty"` }
Define different QoS and their price.
func (*QosConfig) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new QosConfig.
func (*QosConfig) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type QosDefinition ¶
type QosDefinition struct { Name QoSLevel `json:"name,omitempty"` Description string `json:"description,omitempty"` Priority int `json:"priority,omitempty"` // Range from 1-100, reflects the scheduling priority when GPU is full and tasks are in the queue. }
func (*QosDefinition) DeepCopy ¶
func (in *QosDefinition) DeepCopy() *QosDefinition
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new QosDefinition.
func (*QosDefinition) DeepCopyInto ¶
func (in *QosDefinition) DeepCopyInto(out *QosDefinition)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type QosPricing ¶
type QosPricing struct { Qos QoSLevel `json:"qos,omitempty"` Requests GPUResourcePricingUnit `json:"requests,omitempty"` // Default requests and limitsOverRequests are same, indicates normal on-demand serverless GPU usage, in hands-on lab low QoS case, limitsOverRequests should be cheaper, for example Low QoS, ratio should be 0.5 // +kubebuilder:default="1" LimitsOverRequestsChargingRatio string `json:"limitsOverRequests,omitempty"` }
func (*QosPricing) DeepCopy ¶
func (in *QosPricing) DeepCopy() *QosPricing
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new QosPricing.
func (*QosPricing) DeepCopyInto ¶
func (in *QosPricing) DeepCopyInto(out *QosPricing)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ReBalanceThreshold ¶
type ReBalanceThreshold struct {
MatchAny runtime.RawExtension `json:"matchAny,omitempty"`
}
func (*ReBalanceThreshold) DeepCopy ¶
func (in *ReBalanceThreshold) DeepCopy() *ReBalanceThreshold
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ReBalanceThreshold.
func (*ReBalanceThreshold) DeepCopyInto ¶
func (in *ReBalanceThreshold) DeepCopyInto(out *ReBalanceThreshold)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ReBalancerConfig ¶
type ReBalancerConfig struct { Enable *bool `json:"enable,omitempty"` Interval string `json:"interval,omitempty"` ReBalanceCoolDownTime string `json:"reBalanceCoolDownTime,omitempty"` Threshold ReBalanceThreshold `json:"threshold,omitempty"` }
Avoid hot GPU devices and continuously balance the workload\nimplemented by trigger a simulation scheduling and advise better GPU nodes for scheduler
func (*ReBalancerConfig) DeepCopy ¶
func (in *ReBalancerConfig) DeepCopy() *ReBalancerConfig
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ReBalancerConfig.
func (*ReBalancerConfig) DeepCopyInto ¶
func (in *ReBalancerConfig) DeepCopyInto(out *ReBalancerConfig)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type RemoteWriteConfig ¶
type RemoteWriteConfig struct { Connection DataPipelineResultRemoteWriteConfig `json:"connection,omitempty"` Metrics []string `json:"metrics,omitempty"` // List of metrics to remote write. }
RemoteWriteConfig represents the configuration for remote write.
func (*RemoteWriteConfig) DeepCopy ¶
func (in *RemoteWriteConfig) DeepCopy() *RemoteWriteConfig
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemoteWriteConfig.
func (*RemoteWriteConfig) DeepCopyInto ¶
func (in *RemoteWriteConfig) DeepCopyInto(out *RemoteWriteConfig)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type Requirement ¶
type Requirement struct { Key NodeRequirementKey `json:"key,omitempty"` // +kubebuilder:default="In" // +kubebuilder:validation:Enum=In;Exists;DoesNotExist;Gt;Lt Operator corev1.NodeSelectorOperator `json:"operator,omitempty"` Values []string `json:"values,omitempty"` }
func (*Requirement) DeepCopy ¶
func (in *Requirement) DeepCopy() *Requirement
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Requirement.
func (*Requirement) DeepCopyInto ¶
func (in *Requirement) DeepCopyInto(out *Requirement)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type Resource ¶
type Resource struct { Tflops resource.Quantity `json:"tflops"` Vram resource.Quantity `json:"vram"` }
func (*Resource) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Resource.
func (*Resource) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type Resources ¶
func (*Resources) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Resources.
func (*Resources) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ScaleToZero ¶
type ScaleToZero struct { AutoFreeze []AutoFreeze `json:"autoFreeze,omitempty"` IntelligenceWarmup SmartSchedulerModelInput `json:"intelligenceWarmup,omitempty"` }
func (*ScaleToZero) DeepCopy ¶
func (in *ScaleToZero) DeepCopy() *ScaleToZero
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ScaleToZero.
func (*ScaleToZero) DeepCopyInto ¶
func (in *ScaleToZero) DeepCopyInto(out *ScaleToZero)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type SchedulingConfigTemplate ¶
type SchedulingConfigTemplate struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec SchedulingConfigTemplateSpec `json:"spec,omitempty"` Status SchedulingConfigTemplateStatus `json:"status,omitempty"` }
+kubebuilder:object:root=true +kubebuilder:subresource:status +kubebuilder:resource:scope=Cluster +kubebuilder:printcolumn:name="Mode",type="string",JSONPath=".spec.placement.mode" +kubebuilder:printcolumn:name="Allow Local GPU",type="string",JSONPath=".spec.placement.allowLocalGPU" SchedulingConfigTemplate is the Schema for the schedulingconfigtemplates API.
func (*SchedulingConfigTemplate) DeepCopy ¶
func (in *SchedulingConfigTemplate) DeepCopy() *SchedulingConfigTemplate
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingConfigTemplate.
func (*SchedulingConfigTemplate) DeepCopyInto ¶
func (in *SchedulingConfigTemplate) DeepCopyInto(out *SchedulingConfigTemplate)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*SchedulingConfigTemplate) DeepCopyObject ¶
func (in *SchedulingConfigTemplate) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type SchedulingConfigTemplateList ¶
type SchedulingConfigTemplateList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` Items []SchedulingConfigTemplate `json:"items"` }
SchedulingConfigTemplateList contains a list of SchedulingConfigTemplate.
func (*SchedulingConfigTemplateList) DeepCopy ¶
func (in *SchedulingConfigTemplateList) DeepCopy() *SchedulingConfigTemplateList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingConfigTemplateList.
func (*SchedulingConfigTemplateList) DeepCopyInto ¶
func (in *SchedulingConfigTemplateList) DeepCopyInto(out *SchedulingConfigTemplateList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*SchedulingConfigTemplateList) DeepCopyObject ¶
func (in *SchedulingConfigTemplateList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type SchedulingConfigTemplateSpec ¶
type SchedulingConfigTemplateSpec struct { // place the client or worker to best matched nodes Placement PlacementConfig `json:"placement"` // scale the workload based on the usage and traffic // +optional AutoScaling *AutoScalingConfig `json:"autoScaling,omitempty"` // avoid hot GPU devices and continuously balance the workload // implemented by trigger a simulation scheduling and advise better GPU nodes for scheduler // +optional ReBalancer *ReBalancerConfig `json:"reBalancer,omitempty"` // single GPU device multi-process queuing and fair scheduling with QoS constraint // +optional Hypervisor *HypervisorScheduling `json:"hypervisor,omitempty"` }
Place the workload to right nodes and scale smart.
func (*SchedulingConfigTemplateSpec) DeepCopy ¶
func (in *SchedulingConfigTemplateSpec) DeepCopy() *SchedulingConfigTemplateSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingConfigTemplateSpec.
func (*SchedulingConfigTemplateSpec) DeepCopyInto ¶
func (in *SchedulingConfigTemplateSpec) DeepCopyInto(out *SchedulingConfigTemplateSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type SchedulingConfigTemplateStatus ¶
type SchedulingConfigTemplateStatus struct { }
SchedulingConfigTemplateStatus defines the observed state of SchedulingConfigTemplate.
func (*SchedulingConfigTemplateStatus) DeepCopy ¶
func (in *SchedulingConfigTemplateStatus) DeepCopy() *SchedulingConfigTemplateStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingConfigTemplateStatus.
func (*SchedulingConfigTemplateStatus) DeepCopyInto ¶
func (in *SchedulingConfigTemplateStatus) DeepCopyInto(out *SchedulingConfigTemplateStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type SmartSchedulerModelInput ¶
type SmartSchedulerModelInput struct { Enable *bool `json:"enable,omitempty"` Model string `json:"model,omitempty"` HistoryDataPeriod string `json:"historyDataPeriod,omitempty"` PredictionPeriod string `json:"predictionPeriod,omitempty"` }
func (*SmartSchedulerModelInput) DeepCopy ¶
func (in *SmartSchedulerModelInput) DeepCopy() *SmartSchedulerModelInput
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SmartSchedulerModelInput.
func (*SmartSchedulerModelInput) DeepCopyInto ¶
func (in *SmartSchedulerModelInput) DeepCopyInto(out *SmartSchedulerModelInput)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type StorageVendorConfig ¶
type StorageVendorConfig struct { Mode string `json:"mode,omitempty"` // Mode of the storage vendor (e.g., cloudnative-pg, timescale-db, RDS for PG). Image string `json:"image,omitempty"` // Image for the storage vendor (default to timescale). // +optional InstallCloudNativePGOperator *bool `json:"installCloudNativePGOperator,omitempty"` // Whether to install CloudNative-PG operator. StorageClass string `json:"storageClass,omitempty"` // Storage class for the storage vendor. PGExtensions []string `json:"pgExtensions,omitempty"` // List of PostgreSQL extensions to install. PGClusterTemplate runtime.RawExtension `json:"pgClusterTemplate,omitempty"` // Extra spec for the PostgreSQL cluster template. }
StorageVendorConfig defines Postgres database with extensions for timeseries storage and other resource aggregation results, system events and diagnostics reports etc.
func (*StorageVendorConfig) DeepCopy ¶
func (in *StorageVendorConfig) DeepCopy() *StorageVendorConfig
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StorageVendorConfig.
func (*StorageVendorConfig) DeepCopyInto ¶
func (in *StorageVendorConfig) DeepCopyInto(out *StorageVendorConfig)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type Taint ¶
type Taint struct { // +kubebuilder:default=NoSchedule // +kubebuilder:validation:Enum=NoSchedule;NoExecute;PreferNoSchedule Effect corev1.TaintEffect `json:"effect,omitempty"` Key string `json:"key,omitempty"` Value string `json:"value,omitempty"` }
func (*Taint) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Taint.
func (*Taint) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type TensorFusionCluster ¶
type TensorFusionCluster struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec TensorFusionClusterSpec `json:"spec,omitempty"` Status TensorFusionClusterStatus `json:"status,omitempty"` }
+kubebuilder:printcolumn:name="Total Tflops",type="string",JSONPath=".status.totalTFlops" +kubebuilder:printcolumn:name="Total VRAM",type="string",JSONPath=".status.totalVRAM" +kubebuilder:printcolumn:name="Available Tflops",type="string",JSONPath=".status.availableTFlops" +kubebuilder:printcolumn:name="Available VRAM",type="string",JSONPath=".status.availableVRAM" TensorFusionCluster is the Schema for the tensorfusionclusters API.
func (*TensorFusionCluster) DeepCopy ¶
func (in *TensorFusionCluster) DeepCopy() *TensorFusionCluster
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TensorFusionCluster.
func (*TensorFusionCluster) DeepCopyInto ¶
func (in *TensorFusionCluster) DeepCopyInto(out *TensorFusionCluster)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*TensorFusionCluster) DeepCopyObject ¶
func (in *TensorFusionCluster) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (*TensorFusionCluster) RefreshStatus ¶
func (tfc *TensorFusionCluster) RefreshStatus(ownedPools []GPUPool)
func (*TensorFusionCluster) SetAsPending ¶
func (tfc *TensorFusionCluster) SetAsPending()
func (*TensorFusionCluster) SetAsReady ¶
func (tfc *TensorFusionCluster) SetAsReady(conditions ...metav1.Condition) bool
func (*TensorFusionCluster) SetAsUnknown ¶
func (tfc *TensorFusionCluster) SetAsUnknown(err error) bool
func (*TensorFusionCluster) SetAsUpdating ¶
func (tfc *TensorFusionCluster) SetAsUpdating(conditions ...metav1.Condition) bool
type TensorFusionClusterList ¶
type TensorFusionClusterList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` Items []TensorFusionCluster `json:"items"` }
TensorFusionClusterList contains a list of TensorFusionCluster.
func (*TensorFusionClusterList) DeepCopy ¶
func (in *TensorFusionClusterList) DeepCopy() *TensorFusionClusterList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TensorFusionClusterList.
func (*TensorFusionClusterList) DeepCopyInto ¶
func (in *TensorFusionClusterList) DeepCopyInto(out *TensorFusionClusterList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*TensorFusionClusterList) DeepCopyObject ¶
func (in *TensorFusionClusterList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type TensorFusionClusterPhase ¶
type TensorFusionClusterPhase string
+kubebuilder:validation:Enum=Pending;Running;Updating;Destroying;Unknown TensorFusionClusterPhase represents the phase of the TensorFusionCluster resource.
type TensorFusionClusterSpec ¶
type TensorFusionClusterSpec struct { GPUPools []GPUPoolDefinition `json:"gpuPools,omitempty"` // +optional ComputingVendor *ComputingVendorConfig `json:"computingVendor,omitempty"` // +optional StorageVendor *StorageVendorConfig `json:"storageVendor,omitempty"` // +optional DataPipelines *DataPipelinesConfig `json:"dataPipelines,omitempty"` }
TensorFusionClusterSpec defines the desired state of TensorFusionCluster.
func (*TensorFusionClusterSpec) DeepCopy ¶
func (in *TensorFusionClusterSpec) DeepCopy() *TensorFusionClusterSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TensorFusionClusterSpec.
func (*TensorFusionClusterSpec) DeepCopyInto ¶
func (in *TensorFusionClusterSpec) DeepCopyInto(out *TensorFusionClusterSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type TensorFusionClusterStatus ¶
type TensorFusionClusterStatus struct { // +kubebuilder:default=Pending Phase TensorFusionClusterPhase `json:"phase,omitempty"` Conditions []metav1.Condition `json:"conditions,omitempty"` TotalPools int32 `json:"totalPools"` TotalNodes int32 `json:"totalNodes"` TotalGPUs int32 `json:"totalGPUs"` TotalTFlops resource.Quantity `json:"totalTFlops"` TotalVRAM resource.Quantity `json:"totalVRAM"` VirtualTFlops resource.Quantity `json:"virtualTFlops"` VirtualVRAM resource.Quantity `json:"virtualVRAM"` AvailableTFlops resource.Quantity `json:"availableTFlops"` AvailableVRAM resource.Quantity `json:"availableVRAM"` // +optional VirtualAvailableTFlops *resource.Quantity `json:"virtualAvailableTFlops,omitempty"` // +optional VirtualAvailableVRAM *resource.Quantity `json:"virtualAvailableVRAM,omitempty"` // +optional ReadyGPUPools []string `json:"readyGPUPools"` // +optional NotReadyGPUPools []string `json:"notReadyGPUPools"` // +kubebuilder:default=0 // RetryCount int64 `json:"retryCount"` // TODO: calculated every 1h/1d/1w average UtilizedTFlopsPercent string `json:"utilizedTFlopsPercent,omitempty"` UtilizedVRAMPercent string `json:"utilizedVRAMPercent,omitempty"` // TODO: updated with interval AllocatedTFlopsPercent string `json:"allocatedTFlopsPercent,omitempty"` AllocatedVRAMPercent string `json:"allocatedVRAMPercent,omitempty"` // TODO: aggregated with interval SavedCostsPerMonth string `json:"savedCostsPerMonth,omitempty"` PotentialSavingsPerMonth string `json:"potentialSavingsPerMonth,omitempty"` CloudVendorConfigHash string `json:"cloudVendorConfigHash,omitempty"` }
TensorFusionClusterStatus defines the observed state of TensorFusionCluster.
func (*TensorFusionClusterStatus) DeepCopy ¶
func (in *TensorFusionClusterStatus) DeepCopy() *TensorFusionClusterStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TensorFusionClusterStatus.
func (*TensorFusionClusterStatus) DeepCopyInto ¶
func (in *TensorFusionClusterStatus) DeepCopyInto(out *TensorFusionClusterStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type TensorFusionConnection ¶
type TensorFusionConnection struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec TensorFusionConnectionSpec `json:"spec,omitempty"` Status TensorFusionConnectionStatus `json:"status,omitempty"` }
TensorFusionConnection is the Schema for the tensorfusionconnections API.
func (*TensorFusionConnection) DeepCopy ¶
func (in *TensorFusionConnection) DeepCopy() *TensorFusionConnection
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TensorFusionConnection.
func (*TensorFusionConnection) DeepCopyInto ¶
func (in *TensorFusionConnection) DeepCopyInto(out *TensorFusionConnection)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*TensorFusionConnection) DeepCopyObject ¶
func (in *TensorFusionConnection) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type TensorFusionConnectionList ¶
type TensorFusionConnectionList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` Items []TensorFusionConnection `json:"items"` }
TensorFusionConnectionList contains a list of TensorFusionConnection.
func (*TensorFusionConnectionList) DeepCopy ¶
func (in *TensorFusionConnectionList) DeepCopy() *TensorFusionConnectionList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TensorFusionConnectionList.
func (*TensorFusionConnectionList) DeepCopyInto ¶
func (in *TensorFusionConnectionList) DeepCopyInto(out *TensorFusionConnectionList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*TensorFusionConnectionList) DeepCopyObject ¶
func (in *TensorFusionConnectionList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type TensorFusionConnectionSpec ¶
type TensorFusionConnectionSpec struct {
WorkloadName string `json:"workloadName"`
}
TensorFusionConnectionSpec defines the desired state of TensorFusionConnection.
func (*TensorFusionConnectionSpec) DeepCopy ¶
func (in *TensorFusionConnectionSpec) DeepCopy() *TensorFusionConnectionSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TensorFusionConnectionSpec.
func (*TensorFusionConnectionSpec) DeepCopyInto ¶
func (in *TensorFusionConnectionSpec) DeepCopyInto(out *TensorFusionConnectionSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type TensorFusionConnectionStatus ¶
type TensorFusionConnectionStatus struct { Phase WorkerPhase `json:"phase"` ConnectionURL string `json:"connectionURL"` WorkerName string `json:"workerName"` }
TensorFusionConnectionStatus defines the observed state of TensorFusionConnection.
func (*TensorFusionConnectionStatus) DeepCopy ¶
func (in *TensorFusionConnectionStatus) DeepCopy() *TensorFusionConnectionStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TensorFusionConnectionStatus.
func (*TensorFusionConnectionStatus) DeepCopyInto ¶
func (in *TensorFusionConnectionStatus) DeepCopyInto(out *TensorFusionConnectionStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type TensorFusionGPUNodePhase ¶
type TensorFusionGPUNodePhase string
+kubebuilder:validation:Enum=Pending;Provisioning;Migrating;Running;Succeeded;Failed;Unknown;Destroying
const ( TensorFusionGPUNodePhasePending TensorFusionGPUNodePhase = constants.PhasePending TensorFusionGPUNodePhaseMigrating TensorFusionGPUNodePhase = constants.PhaseMigrating TensorFusionGPUNodePhaseRunning TensorFusionGPUNodePhase = constants.PhaseRunning TensorFusionGPUNodePhaseSucceeded TensorFusionGPUNodePhase = constants.PhaseSucceeded TensorFusionGPUNodePhaseFailed TensorFusionGPUNodePhase = constants.PhaseFailed TensorFusionGPUNodePhaseUnknown TensorFusionGPUNodePhase = constants.PhaseUnknown TensorFusionGPUNodePhaseDestroying TensorFusionGPUNodePhase = constants.PhaseDestroying )
type TensorFusionGPUPhase ¶
type TensorFusionGPUPhase string
+kubebuilder:validation:Enum=Pending;Provisioning;Running;Unknown;Destroying;Migrating
const ( TensorFusionGPUPhasePending TensorFusionGPUPhase = constants.PhasePending TensorFusionGPUPhaseUpdating TensorFusionGPUPhase = constants.PhaseUpdating TensorFusionGPUPhaseRunning TensorFusionGPUPhase = constants.PhaseRunning TensorFusionGPUPhaseUnknown TensorFusionGPUPhase = constants.PhaseUnknown TensorFusionGPUPhaseDestroying TensorFusionGPUPhase = constants.PhaseDestroying TensorFusionGPUPhaseMigrating TensorFusionGPUPhase = constants.PhaseMigrating )
type TensorFusionPoolPhase ¶
type TensorFusionPoolPhase string
+kubebuilder:validation:Enum=Pending;Running;Updating;Destroying;Unknown
type TensorFusionWorkload ¶
type TensorFusionWorkload struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec WorkloadProfileSpec `json:"spec,omitempty"` Status TensorFusionWorkloadStatus `json:"status,omitempty"` }
TensorFusionWorkload is the Schema for the tensorfusionworkloads API.
func (*TensorFusionWorkload) DeepCopy ¶
func (in *TensorFusionWorkload) DeepCopy() *TensorFusionWorkload
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TensorFusionWorkload.
func (*TensorFusionWorkload) DeepCopyInto ¶
func (in *TensorFusionWorkload) DeepCopyInto(out *TensorFusionWorkload)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*TensorFusionWorkload) DeepCopyObject ¶
func (in *TensorFusionWorkload) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type TensorFusionWorkloadList ¶
type TensorFusionWorkloadList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` Items []TensorFusionWorkload `json:"items"` }
TensorFusionWorkloadList contains a list of TensorFusionWorkload.
func (*TensorFusionWorkloadList) DeepCopy ¶
func (in *TensorFusionWorkloadList) DeepCopy() *TensorFusionWorkloadList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TensorFusionWorkloadList.
func (*TensorFusionWorkloadList) DeepCopyInto ¶
func (in *TensorFusionWorkloadList) DeepCopyInto(out *TensorFusionWorkloadList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*TensorFusionWorkloadList) DeepCopyObject ¶
func (in *TensorFusionWorkloadList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type TensorFusionWorkloadStatus ¶
type TensorFusionWorkloadStatus struct { // replicas is the number of Pods created by the Workload controller. Replicas int32 `json:"replicas"` // readyReplicas is the number of pods created for this Workload with a Ready Condition. ReadyReplicas int32 `json:"readyReplicas,omitempty"` WorkerStatuses []WorkerStatus `json:"workerStatuses,omitempty"` PodTemplateHash string `json:"podTemplateHash,omitempty"` }
TensorFusionWorkloadStatus defines the observed state of TensorFusionWorkload.
func (*TensorFusionWorkloadStatus) DeepCopy ¶
func (in *TensorFusionWorkloadStatus) DeepCopy() *TensorFusionWorkloadStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TensorFusionWorkloadStatus.
func (*TensorFusionWorkloadStatus) DeepCopyInto ¶
func (in *TensorFusionWorkloadStatus) DeepCopyInto(out *TensorFusionWorkloadStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type WorkerConfig ¶
type WorkerConfig struct { // +optional PodTemplate *runtime.RawExtension `json:"podTemplate,omitempty"` }
func (*WorkerConfig) DeepCopy ¶
func (in *WorkerConfig) DeepCopy() *WorkerConfig
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WorkerConfig.
func (*WorkerConfig) DeepCopyInto ¶
func (in *WorkerConfig) DeepCopyInto(out *WorkerConfig)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type WorkerPhase ¶
type WorkerPhase string
const ( WorkerPending WorkerPhase = "Pending" WorkerRunning WorkerPhase = "Running" WorkerFailed WorkerPhase = "Failed" )
type WorkerStatus ¶
type WorkerStatus struct { WorkerPhase WorkerPhase `json:"workerPhase"` WorkerName string `json:"workerName"` NodeSelector map[string]string `json:"nodeSelector,omitempty"` // +optional WorkerIp string `json:"workerIp,omitempty"` // +optional WorkerPort int `json:"workerPort,omitempty"` // +optional ResourceVersion string `json:"resourceVersion,omitempty"` }
func (*WorkerStatus) DeepCopy ¶
func (in *WorkerStatus) DeepCopy() *WorkerStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WorkerStatus.
func (*WorkerStatus) DeepCopyInto ¶
func (in *WorkerStatus) DeepCopyInto(out *WorkerStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type WorkloadProfile ¶ added in v1.23.7
type WorkloadProfile struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec WorkloadProfileSpec `json:"spec,omitempty"` Status WorkloadProfileStatus `json:"status,omitempty"` }
WorkloadProfile is the Schema for the workloadprofiles API.
func (*WorkloadProfile) DeepCopy ¶ added in v1.23.7
func (in *WorkloadProfile) DeepCopy() *WorkloadProfile
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WorkloadProfile.
func (*WorkloadProfile) DeepCopyInto ¶ added in v1.23.7
func (in *WorkloadProfile) DeepCopyInto(out *WorkloadProfile)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*WorkloadProfile) DeepCopyObject ¶ added in v1.23.7
func (in *WorkloadProfile) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type WorkloadProfileList ¶ added in v1.23.7
type WorkloadProfileList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` Items []WorkloadProfile `json:"items"` }
WorkloadProfileList contains a list of WorkloadProfile.
func (*WorkloadProfileList) DeepCopy ¶ added in v1.23.7
func (in *WorkloadProfileList) DeepCopy() *WorkloadProfileList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WorkloadProfileList.
func (*WorkloadProfileList) DeepCopyInto ¶ added in v1.23.7
func (in *WorkloadProfileList) DeepCopyInto(out *WorkloadProfileList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*WorkloadProfileList) DeepCopyObject ¶ added in v1.23.7
func (in *WorkloadProfileList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type WorkloadProfileSpec ¶ added in v1.23.7
type WorkloadProfileSpec struct { // +optional Replicas *int32 `json:"replicas,omitempty"` // +optional PoolName string `json:"poolName,omitempty"` // +optional Resources Resources `json:"resources,omitempty"` // +optional // Qos defines the quality of service level for the client. Qos QoSLevel `json:"qos,omitempty"` // +optional // Schedule the workload to the same GPU server that runs vGPU worker for best performance, default to false IsLocalGPU bool `json:"isLocalGPU,omitempty"` // +optional // TODO, not implemented // The number of GPUs to be used by the workload, default to 1 GPUCount int `json:"gpuCount,omitempty"` // +optional // TODO, not implemented // This mode is only available when `is-local-gpu` set to true, in this mode, TensorFusion will also inject vGPU worker into init container, so that to achieve best performance, trade-off is user might by-pass the vGPU worker and using physical GPU directly NoStandaloneWorkerMode bool `json:"noStandaloneWorkerMode,omitempty"` // +optional // AutoScalingConfig configured here will override Pool's schedulingConfig // This field can not be fully supported in annotation, if user want to enable auto-scaling in annotation, // user can set tensor-fusion.ai/auto-limits|requests|replicas: 'true' AutoScalingConfig AutoScalingConfig `json:"autoScalingConfig,omitempty"` }
WorkloadProfileSpec defines the desired state of WorkloadProfile.
func (*WorkloadProfileSpec) DeepCopy ¶ added in v1.23.7
func (in *WorkloadProfileSpec) DeepCopy() *WorkloadProfileSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WorkloadProfileSpec.
func (*WorkloadProfileSpec) DeepCopyInto ¶ added in v1.23.7
func (in *WorkloadProfileSpec) DeepCopyInto(out *WorkloadProfileSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type WorkloadProfileStatus ¶ added in v1.23.7
type WorkloadProfileStatus struct { }
WorkloadProfileStatus defines the observed state of WorkloadProfile.
func (*WorkloadProfileStatus) DeepCopy ¶ added in v1.23.7
func (in *WorkloadProfileStatus) DeepCopy() *WorkloadProfileStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WorkloadProfileStatus.
func (*WorkloadProfileStatus) DeepCopyInto ¶ added in v1.23.7
func (in *WorkloadProfileStatus) DeepCopyInto(out *WorkloadProfileStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
Source Files
¶
- base_types.go
- gpu_types.go
- gpunode_funcs.go
- gpunode_types.go
- gpunodeclass_types.go
- gpupool_types.go
- groupversion_info.go
- schedulingconfigtemplate_types.go
- tensorfusioncluster_funcs.go
- tensorfusioncluster_types.go
- tensorfusionconnection_types.go
- tensorfusionworkload_types.go
- workloadprofile_types.go
- zz_generated.deepcopy.go