 Documentation
      ¶
      Documentation
      ¶
    
    
  
    
  
    Index ¶
- Variables
- func Node(options NodeOptions) *corev1.Node
- func SetTestModeCompactionPeriod()
- func UpdateK8SNodeSelectorHash(ctx context.Context, k8sClient client.Client, node *corev1.Node, hash string) error
- type FakeNodeClaimReconciler
- type GPUNodeClaimReconciler
- type GPUNodeClassReconciler
- type GPUNodeReconciler
- type GPUPoolCompactionReconciler
- type GPUPoolReconciler
- type GPUReconciler
- type GPUResourceQuotaReconciler
- type NodeOptions
- type NodeReconciler
- type PodReconciler
- type SchedulingConfigTemplateReconciler
- type TensorFusionClusterReconciler
- type TensorFusionConnectionReconciler
- type TensorFusionWorkloadReconciler
- type WorkloadProfileReconciler
Constants ¶
This section is empty.
Variables ¶
var ( // Killer switch to avoid creating too much cloud vendor nodes // Controlled by /api/provision?enable=true/false ProvisioningToggle = true // creating nodes, next round capacity check should consider the assumed resources // map key is pool name, second level is GPUClaim name PendingGPUNodeClaim map[string]map[string]tfv1.Resource // deleting nodes, must be serialized, delete one round by one round // map key is pool name, value is GPUNode name list PendingDeletionGPUNodes map[string][]string )
Functions ¶
func SetTestModeCompactionPeriod ¶ added in v1.41.0
func SetTestModeCompactionPeriod()
Types ¶
type FakeNodeClaimReconciler ¶ added in v1.40.0
type FakeNodeClaimReconciler struct {
	Scheme *runtime.Scheme
	// contains filtered or unexported fields
}
    func (*FakeNodeClaimReconciler) SetupWithManager ¶ added in v1.40.0
func (r *FakeNodeClaimReconciler) SetupWithManager(mgr ctrl.Manager) error
type GPUNodeClaimReconciler ¶ added in v1.41.0
type GPUNodeClaimReconciler struct {
	client.Client
	Scheme   *runtime.Scheme
	Recorder record.EventRecorder
}
    GPUNodeClaimReconciler reconciles a GPUNodeClaim object
func (*GPUNodeClaimReconciler) Reconcile ¶ added in v1.41.0
func (r *GPUNodeClaimReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)
GPUNodeClaim is responsible for creating cloud vendor GPU nodes
func (*GPUNodeClaimReconciler) SetupWithManager ¶ added in v1.41.0
func (r *GPUNodeClaimReconciler) SetupWithManager(mgr ctrl.Manager) error
SetupWithManager sets up the controller with the Manager.
type GPUNodeClassReconciler ¶
GPUNodeClassReconciler reconciles a GPUNodeClass object
func (*GPUNodeClassReconciler) Reconcile ¶
func (r *GPUNodeClassReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)
Reconcile GPU node classes
func (*GPUNodeClassReconciler) SetupWithManager ¶
func (r *GPUNodeClassReconciler) SetupWithManager(mgr ctrl.Manager) error
SetupWithManager sets up the controller with the Manager.
type GPUNodeReconciler ¶
type GPUNodeReconciler struct {
	client.Client
	Scheme   *runtime.Scheme
	Recorder record.EventRecorder
}
    GPUNodeReconciler reconciles a GPUNode object
func (*GPUNodeReconciler) SetupWithManager ¶
func (r *GPUNodeReconciler) SetupWithManager(mgr ctrl.Manager) error
SetupWithManager sets up the controller with the Manager.
type GPUPoolCompactionReconciler ¶
type GPUPoolCompactionReconciler struct {
	client.Client
	Scheme   *runtime.Scheme
	Recorder record.EventRecorder
	Allocator *gpuallocator.GpuAllocator
	// contains filtered or unexported fields
}
    GPUPoolReconciler reconciles a GPUPool object
func (*GPUPoolCompactionReconciler) SetupWithManager ¶
func (r *GPUPoolCompactionReconciler) SetupWithManager(mgr ctrl.Manager) error
SetupWithManager sets up the controller with the Manager.
type GPUPoolReconciler ¶
type GPUPoolReconciler struct {
	client.Client
	LastProcessedItems sync.Map
	Scheme   *runtime.Scheme
	Recorder record.EventRecorder
}
    GPUPoolReconciler reconciles a GPUPool object
func (*GPUPoolReconciler) SetupWithManager ¶
func (r *GPUPoolReconciler) SetupWithManager(mgr ctrl.Manager, addLimiter bool) error
SetupWithManager sets up the controller with the Manager.
type GPUReconciler ¶
GPUReconciler reconciles a GPU object
func (*GPUReconciler) Reconcile ¶
Reconcile is part of the main kubernetes reconciliation loop which aims to move the current state of the cluster closer to the desired state.
func (*GPUReconciler) SetupWithManager ¶
SetupWithManager sets up the controller with the Manager.
type GPUResourceQuotaReconciler ¶ added in v1.34.6
type GPUResourceQuotaReconciler struct {
	client.Client
	Scheme     *runtime.Scheme
	Recorder   record.EventRecorder
	QuotaStore *quota.QuotaStore
}
    GPUResourceQuotaReconciler reconciles a GPUResourceQuota object
func (*GPUResourceQuotaReconciler) Reconcile ¶ added in v1.34.6
func (r *GPUResourceQuotaReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)
Reconcile is part of the main kubernetes reconciliation loop which aims to move the current state of the cluster closer to the desired state.
func (*GPUResourceQuotaReconciler) SetupWithManager ¶ added in v1.34.6
func (r *GPUResourceQuotaReconciler) SetupWithManager(mgr ctrl.Manager) error
SetupWithManager sets up the controller with the Manager.
type NodeOptions ¶ added in v1.40.0
type NodeOptions struct {
	metav1.ObjectMeta
	ReadyStatus    corev1.ConditionStatus
	ReadyReason    string
	Conditions     []corev1.NodeCondition
	Unschedulable  bool
	ProviderID     string
	Taints         []corev1.Taint
	Allocatable    corev1.ResourceList
	Capacity       corev1.ResourceList
	OwnerReference []metav1.OwnerReference
}
    type NodeReconciler ¶
PodReconciler reconciles a Pod object
func (*NodeReconciler) SetupWithManager ¶
func (r *NodeReconciler) SetupWithManager(mgr ctrl.Manager) error
SetupWithManager sets up the controller with the Manager.
type PodReconciler ¶
type PodReconciler struct {
	client.Client
	Scheme        *runtime.Scheme
	Allocator     *gpuallocator.GpuAllocator
	PortAllocator *portallocator.PortAllocator
}
    PodReconciler reconciles a Pod object
func (*PodReconciler) Reconcile ¶
Add GPU connection for Pods using GPU Have to create TensorFusion connection here because pod UID not available in MutatingWebhook
func (*PodReconciler) SetupWithManager ¶
func (r *PodReconciler) SetupWithManager(mgr ctrl.Manager) error
SetupWithManager sets up the controller with the Manager.
type SchedulingConfigTemplateReconciler ¶
SchedulingConfigTemplateReconciler reconciles a SchedulingConfigTemplate object
func (*SchedulingConfigTemplateReconciler) Reconcile ¶
func (r *SchedulingConfigTemplateReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)
When deleted, need check if any GPU pool is using this template, if so, add warning event and requeue When updated, trigger the re-scheduling
func (*SchedulingConfigTemplateReconciler) SetupWithManager ¶
func (r *SchedulingConfigTemplateReconciler) SetupWithManager(mgr ctrl.Manager) error
SetupWithManager sets up the controller with the Manager.
type TensorFusionClusterReconciler ¶
type TensorFusionClusterReconciler struct {
	client.Client
	Scheme          *runtime.Scheme
	Recorder        record.EventRecorder
	MetricsRecorder *metrics.MetricsRecorder
	LastProcessedItems sync.Map
}
    TensorFusionClusterReconciler reconciles a TensorFusionCluster object
func (*TensorFusionClusterReconciler) Reconcile ¶
func (r *TensorFusionClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)
Reconcile a TensorFusionCluster object, create and monitor GPU Pool, managing cluster level component versions
func (*TensorFusionClusterReconciler) SetupWithManager ¶
func (r *TensorFusionClusterReconciler) SetupWithManager(mgr ctrl.Manager, addLimiter bool) error
SetupWithManager sets up the controller with the Manager.
type TensorFusionConnectionReconciler ¶
type TensorFusionConnectionReconciler struct {
	client.Client
	Scheme   *runtime.Scheme
	Recorder record.EventRecorder
}
    TensorFusionConnectionReconciler reconciles a TensorFusionConnection object
func (*TensorFusionConnectionReconciler) Reconcile ¶
func (r *TensorFusionConnectionReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)
Add and monitor GPU worker Pod for a TensorFusionConnection
func (*TensorFusionConnectionReconciler) SetupWithManager ¶
func (r *TensorFusionConnectionReconciler) SetupWithManager(mgr ctrl.Manager) error
SetupWithManager sets up the controller with the Manager.
type TensorFusionWorkloadReconciler ¶
type TensorFusionWorkloadReconciler struct {
	client.Client
	Scheme        *runtime.Scheme
	Recorder      record.EventRecorder
	PortAllocator *portallocator.PortAllocator
}
    TensorFusionWorkloadReconciler reconciles a TensorFusionWorkload object
func (*TensorFusionWorkloadReconciler) Reconcile ¶
func (r *TensorFusionWorkloadReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)
TensorFusionWorkload Reconciler
func (*TensorFusionWorkloadReconciler) SetupWithManager ¶
func (r *TensorFusionWorkloadReconciler) SetupWithManager(mgr ctrl.Manager) error
SetupWithManager sets up the controller with the Manager.
type WorkloadProfileReconciler ¶ added in v1.23.7
WorkloadProfileReconciler reconciles a WorkloadProfile object
func (*WorkloadProfileReconciler) Reconcile ¶ added in v1.23.7
func (r *WorkloadProfileReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)
WorkloadProfile is a template to be referred by TensorFusionWorkload, no logic for reconcile
func (*WorkloadProfileReconciler) SetupWithManager ¶ added in v1.23.7
func (r *WorkloadProfileReconciler) SetupWithManager(mgr ctrl.Manager) error
SetupWithManager sets up the controller with the Manager.
       Source Files
      ¶
      Source Files
      ¶
    
- fake_node_claim_contoller.go
- gpu_controller.go
- gpunode_controller.go
- gpunodeclaim_controller.go
- gpunodeclass_controller.go
- gpupool_compaction_controller.go
- gpupool_controller.go
- gpupool_node_provision.go
- gpuresourcequota_controller.go
- node_controller.go
- pod_controller.go
- schedulingconfigtemplate_controller.go
- tensorfusioncluster_controller.go
- tensorfusionconnection_controller.go
- tensorfusionworkload_controller.go
- workloadprofile_controller.go