Documentation
¶
Index ¶
- func Node(options NodeOptions) *corev1.Node
- type FakeNodeClaimReconciler
- type GPUNodeClassReconciler
- type GPUNodeReconciler
- type GPUPoolCompactionReconciler
- type GPUPoolReconciler
- type GPUReconciler
- type GPUResourceQuotaReconciler
- type NodeOptions
- type NodeReconciler
- type PodReconciler
- type SchedulingConfigTemplateReconciler
- type TensorFusionClusterReconciler
- type TensorFusionConnectionReconciler
- type TensorFusionWorkloadReconciler
- type WorkloadProfileReconciler
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
Types ¶
type FakeNodeClaimReconciler ¶ added in v1.40.0
type FakeNodeClaimReconciler struct {
Scheme *runtime.Scheme
// contains filtered or unexported fields
}
func (*FakeNodeClaimReconciler) SetupWithManager ¶ added in v1.40.0
func (r *FakeNodeClaimReconciler) SetupWithManager(mgr ctrl.Manager) error
type GPUNodeClassReconciler ¶
GPUNodeClassReconciler reconciles a GPUNodeClass object
func (*GPUNodeClassReconciler) Reconcile ¶
func (r *GPUNodeClassReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)
Reconcile GPU node classes
func (*GPUNodeClassReconciler) SetupWithManager ¶
func (r *GPUNodeClassReconciler) SetupWithManager(mgr ctrl.Manager) error
SetupWithManager sets up the controller with the Manager.
type GPUNodeReconciler ¶
type GPUNodeReconciler struct {
client.Client
Scheme *runtime.Scheme
Recorder record.EventRecorder
GlobalConfig *config.GlobalConfig
}
GPUNodeReconciler reconciles a GPUNode object
func (*GPUNodeReconciler) SetupWithManager ¶
func (r *GPUNodeReconciler) SetupWithManager(mgr ctrl.Manager) error
SetupWithManager sets up the controller with the Manager.
type GPUPoolCompactionReconciler ¶
type GPUPoolCompactionReconciler struct {
client.Client
Scheme *runtime.Scheme
Recorder record.EventRecorder
Allocator *gpuallocator.GpuAllocator
}
GPUPoolReconciler reconciles a GPUPool object
func (*GPUPoolCompactionReconciler) SetupWithManager ¶
func (r *GPUPoolCompactionReconciler) SetupWithManager(mgr ctrl.Manager) error
SetupWithManager sets up the controller with the Manager.
type GPUPoolReconciler ¶
type GPUPoolReconciler struct {
client.Client
LastProcessedItems sync.Map
Scheme *runtime.Scheme
Recorder record.EventRecorder
}
GPUPoolReconciler reconciles a GPUPool object
func (*GPUPoolReconciler) SetupWithManager ¶
func (r *GPUPoolReconciler) SetupWithManager(mgr ctrl.Manager) error
SetupWithManager sets up the controller with the Manager.
type GPUReconciler ¶
GPUReconciler reconciles a GPU object
func (*GPUReconciler) Reconcile ¶
Reconcile is part of the main kubernetes reconciliation loop which aims to move the current state of the cluster closer to the desired state.
func (*GPUReconciler) SetupWithManager ¶
SetupWithManager sets up the controller with the Manager.
type GPUResourceQuotaReconciler ¶ added in v1.34.6
type GPUResourceQuotaReconciler struct {
client.Client
Scheme *runtime.Scheme
Recorder record.EventRecorder
QuotaStore *quota.QuotaStore
}
GPUResourceQuotaReconciler reconciles a GPUResourceQuota object
func (*GPUResourceQuotaReconciler) Reconcile ¶ added in v1.34.6
func (r *GPUResourceQuotaReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)
Reconcile is part of the main kubernetes reconciliation loop which aims to move the current state of the cluster closer to the desired state.
func (*GPUResourceQuotaReconciler) SetupWithManager ¶ added in v1.34.6
func (r *GPUResourceQuotaReconciler) SetupWithManager(mgr ctrl.Manager) error
SetupWithManager sets up the controller with the Manager.
type NodeOptions ¶ added in v1.40.0
type NodeOptions struct {
metav1.ObjectMeta
ReadyStatus corev1.ConditionStatus
ReadyReason string
Conditions []corev1.NodeCondition
Unschedulable bool
ProviderID string
Taints []corev1.Taint
Allocatable corev1.ResourceList
Capacity corev1.ResourceList
OwnerReference []metav1.OwnerReference
}
type NodeReconciler ¶
PodReconciler reconciles a Pod object
func (*NodeReconciler) Reconcile ¶
This reconcile loop only take effect on nodeSelector mode, while in AutoProvision mode, GPUNode will manage the K8S Node rather than reversed
func (*NodeReconciler) SetupWithManager ¶
func (r *NodeReconciler) SetupWithManager(mgr ctrl.Manager) error
SetupWithManager sets up the controller with the Manager.
type PodReconciler ¶
type PodReconciler struct {
client.Client
Scheme *runtime.Scheme
Allocator *gpuallocator.GpuAllocator
PortAllocator *portallocator.PortAllocator
}
PodReconciler reconciles a Pod object
func (*PodReconciler) Reconcile ¶
Add GPU connection for Pods using GPU Have to create TensorFusion connection here because pod UID not available in MutatingWebhook
func (*PodReconciler) SetupWithManager ¶
func (r *PodReconciler) SetupWithManager(mgr ctrl.Manager) error
SetupWithManager sets up the controller with the Manager.
type SchedulingConfigTemplateReconciler ¶
SchedulingConfigTemplateReconciler reconciles a SchedulingConfigTemplate object
func (*SchedulingConfigTemplateReconciler) Reconcile ¶
func (r *SchedulingConfigTemplateReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)
When deleted, need check if any GPU pool is using this template, if so, add warning event and requeue When updated, trigger the re-scheduling
func (*SchedulingConfigTemplateReconciler) SetupWithManager ¶
func (r *SchedulingConfigTemplateReconciler) SetupWithManager(mgr ctrl.Manager) error
SetupWithManager sets up the controller with the Manager.
type TensorFusionClusterReconciler ¶
type TensorFusionClusterReconciler struct {
client.Client
Scheme *runtime.Scheme
Recorder record.EventRecorder
MetricsRecorder *metrics.MetricsRecorder
LastProcessedItems sync.Map
}
TensorFusionClusterReconciler reconciles a TensorFusionCluster object
func (*TensorFusionClusterReconciler) Reconcile ¶
func (r *TensorFusionClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)
Reconcile a TensorFusionCluster object, create and monitor GPU Pool, managing cluster level component versions
func (*TensorFusionClusterReconciler) SetupWithManager ¶
func (r *TensorFusionClusterReconciler) SetupWithManager(mgr ctrl.Manager) error
SetupWithManager sets up the controller with the Manager.
type TensorFusionConnectionReconciler ¶
type TensorFusionConnectionReconciler struct {
client.Client
Scheme *runtime.Scheme
Recorder record.EventRecorder
}
TensorFusionConnectionReconciler reconciles a TensorFusionConnection object
func (*TensorFusionConnectionReconciler) Reconcile ¶
func (r *TensorFusionConnectionReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)
Add and monitor GPU worker Pod for a TensorFusionConnection
func (*TensorFusionConnectionReconciler) SetupWithManager ¶
func (r *TensorFusionConnectionReconciler) SetupWithManager(mgr ctrl.Manager) error
SetupWithManager sets up the controller with the Manager.
type TensorFusionWorkloadReconciler ¶
type TensorFusionWorkloadReconciler struct {
client.Client
Scheme *runtime.Scheme
Recorder record.EventRecorder
PortAllocator *portallocator.PortAllocator
}
TensorFusionWorkloadReconciler reconciles a TensorFusionWorkload object
func (*TensorFusionWorkloadReconciler) Reconcile ¶
func (r *TensorFusionWorkloadReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)
TensorFusionWorkload Reconciler
func (*TensorFusionWorkloadReconciler) SetupWithManager ¶
func (r *TensorFusionWorkloadReconciler) SetupWithManager(mgr ctrl.Manager) error
SetupWithManager sets up the controller with the Manager.
type WorkloadProfileReconciler ¶ added in v1.23.7
WorkloadProfileReconciler reconciles a WorkloadProfile object
func (*WorkloadProfileReconciler) Reconcile ¶ added in v1.23.7
func (r *WorkloadProfileReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)
WorkloadProfile is a template to be referred by TensorFusionWorkload, no logic for reconcile
func (*WorkloadProfileReconciler) SetupWithManager ¶ added in v1.23.7
func (r *WorkloadProfileReconciler) SetupWithManager(mgr ctrl.Manager) error
SetupWithManager sets up the controller with the Manager.
Source Files
¶
- fake_node_claim_contoller.go
- gpu_controller.go
- gpunode_controller.go
- gpunodeclass_controller.go
- gpupool_compaction_controller.go
- gpupool_controller.go
- gpupool_node_provision.go
- gpuresourcequota_controller.go
- node_controller.go
- pod_controller.go
- schedulingconfigtemplate_controller.go
- tensorfusioncluster_controller.go
- tensorfusionconnection_controller.go
- tensorfusionworkload_controller.go
- workloadprofile_controller.go