Documentation
¶
Index ¶
- Constants
- Variables
- func AddLabelsToObject(ctx context.Context, client crclient.Client, obj metav1.Object, ...) error
- func BuildContextWithTargetPCIAddress(ctx context.Context, pciAddressFromDPUObject string) context.Context
- func ContainsDPUPhase(phases map[string]struct{}, phase provisioningv1.DPUPhase) bool
- func ContainsDPUPhases(phases map[string]struct{}, subPhases map[string]struct{}) bool
- func CopyLabelsOrAnnotations(target, source map[string]string) map[string]string
- func CreateDPUAgentBootstrapKubeconfig(ctx context.Context, client crclient.Client, dpu *provisioningv1.DPU, ...) ([]byte, error)
- func CreateDPUAgentRole(ctx context.Context, client crclient.Client, scheme *runtime.Scheme, ...) error
- func CreateDPUAgentRoleBinding(ctx context.Context, client crclient.Client, scheme *runtime.Scheme, ...) error
- func DPUCondition(condType provisioningv1.DPUConditionType, reason, message string) *metav1.Condition
- func DeleteDPUAgentBootstrapTokens(ctx context.Context, client crclient.Client, dpuName, dpuNamespace string) error
- func DeleteObjects(ctx context.Context, client crclient.Client, objs ...crclient.Object) error
- func GenerateBFBCFGFilePath(filename string) string
- func GenerateBFBFilePath(filename string) string
- func GenerateBFBTMPFilePath(uid string) string
- func GenerateBFBTaskName(bfb provisioningv1.BFB) string
- func GenerateBFCFGFileName(dpuName string, uid string) string
- func GenerateDMSPodName(dpuNode crclient.Object) string
- func GenerateDMSServerCertName(dpuName string) string
- func GenerateDMSServerSecretName(dpuName string) string
- func GenerateDPUName(dpuNodeName string, dpuDeviceName string) string
- func GenerateDPUNodeMaintenanceObjectName(dpuNodeName string, nodeEffect *provisioningv1.NodeEffect) (string, error)
- func GenerateHostAgentPodName(dpuNode crclient.Object) string
- func GenerateLastAppliedAdditionalRequestorsOnDPUAnnotationKey(dpuName string) string
- func GenerateNodeName(dpu *provisioningv1.DPU) string
- func GetBFBRegistryAddressWithPort(ctx context.Context, c crclient.Client, namespace, hostOnlyBase string) (string, error)
- func GetClientset(ctx context.Context, client crclient.Client, dc *provisioningv1.DPUCluster) (*kubernetes.Clientset, []byte, error)
- func GetDPUCondition(status *provisioningv1.DPUStatus, conditionType string) (int, *metav1.Condition)
- func GetDPUDeviceCondition(dpuDevice *provisioningv1.DPUDevice, conditionType string) (int, *metav1.Condition)
- func GetDPUPhases(ctx context.Context, client crclient.Client, dpuNode *provisioningv1.DPUNode, ...) (err error)
- func GetDPUsWithPhase(ctx context.Context, client crclient.Client, dpuNode *provisioningv1.DPUNode, ...) ([]*provisioningv1.DPU, error)
- func GetNamespacedName(obj metav1.Object) types.NamespacedName
- func GetNodeFromDPUCluster(ctx context.Context, client crclient.Client, dpu *provisioningv1.DPU) (*corev1.Node, error)
- func GetObjects(ctx context.Context, client crclient.Client, objects []crclient.Object) (existObjects []crclient.Object, err error)
- func GetRemovedLabels(oldLabels, newLabels map[string]string) []string
- func IsDPUAfterProvisioningPhase(phase provisioningv1.DPUPhase) bool
- func IsDPUBeforeProvisioningPhase(phase provisioningv1.DPUPhase) bool
- func IsDPUInProvisioningPhase(phase provisioningv1.DPUPhase) bool
- func IsDPUNodeReady(dpuNode *provisioningv1.DPUNode) bool
- func IsNodeEffectApplied(dpunodemaintenance *provisioningv1.DPUNodeMaintenance) bool
- func IsNodeReady(node *corev1.Node) bool
- func KubeadmJoinSecretName(dpuName string) string
- func MarshalJSON(obj interface{}) (string, error)
- func NeedUpdateLabels(label1 map[string]string, label2 map[string]string) bool
- func NeedUpdateLabelsOnNodeInDPUCluster(dpuNode *corev1.Node, labelsOnDPUObject map[string]string) (bool, error)
- func NewCondition(condType string, err error, reason, message string) *metav1.Condition
- func RemoveDuplicates(arr []string) []string
- func ReplaceDaemonSetPodNodeNameNodeAffinity(affinity *corev1.Affinity, nodename string) *corev1.Affinity
- func SetDPUCondition(status *provisioningv1.DPUStatus, condition *metav1.Condition) bool
- func SetDPUDeviceCondition(dpuDevice *provisioningv1.DPUDevice, condition *metav1.Condition) bool
- func UpdateLabelsToNode(ctx context.Context, client crclient.Client, node *corev1.Node, ...) error
Constants ¶
const ( // DPUProvisioningLabelPrefix is the prefix for all DPU provisioning labels and annotations. DPUProvisioningPrefix = "provisioning.dpu.nvidia.com/" // DPUNodeRebootMethodLabel is the label that specify the reboot method DPUNodeRebootMethodLabel = DPUProvisioningPrefix + "reboot-method" // DPUNodeAdditionalDPURebootLabel is the label that should be added to a DPUNode to trigger an additional DPU reboot // after the BFB is installed. DPUNodeAdditionalDPURebootLabel = DPUProvisioningPrefix + "dpu-reboot-after-install" // DPUNodeScriptNameLabel is the label that specify the script name for the custom script reboot method. DPUNodeScriptNameLabel = DPUProvisioningPrefix + "script-name" // DPUSetNameLabel is the label that indicates the name of the DPUSet. DPUSetNameLabel = DPUProvisioningPrefix + "dpuset-name" // DPUSetNamespaceLabel is the label that indicates the namespace of the DPUSet. DPUSetNamespaceLabel = DPUProvisioningPrefix + "dpuset-namespace" // DPUDeviceNameLabel is the label that indicates the name of the DPUDevice the DPU is associated with. DPUDeviceNameLabel = DPUProvisioningPrefix + "dpudevice-name" // DPUDevicePCIAddressLabel is the label that indicates the PCI address of the DPU device. DPUDevicePCIAddressLabel = DPUProvisioningPrefix + "dpudevice-pciAddress" // DPUDevicePSIDLabel is the label that indicates the PSID of the DPU device. DPUDevicePSIDLabel = DPUProvisioningPrefix + "dpudevice-psid" // DPUDeviceOPNLabel is the label that indicates the OPN of the DPU device. DPUDeviceOPNLabel = DPUProvisioningPrefix + "dpudevice-opn" // DPUDeviceNumOfPFsLabel is the label that indicates the number of PFs on the DPU device. DPUDeviceNumOfPFsLabel = DPUProvisioningPrefix + "dpudevice-num-of-pfs" // DPUDevicePF0NameLabel is the label that indicates the name of the PF0 on the DPU device. DPUDevicePF0NameLabel = DPUProvisioningPrefix + "dpudevice-pf0-name" // DPUDeviceBMCIPLabel is the label that indicates the BMC IP of the DPU device. DPUDeviceBMCIPLabel = DPUProvisioningPrefix + "dpudevice-bmc-ip" // DPUOOBBridgeConfiguredLabel is the label that indicates that the DPU OOB bridge is configured. DPUOOBBridgeConfiguredLabel = "dpu-oob-bridge-configured" // NodeFeatureDiscoveryLabelPrefix is the prefix for all NodeFeatureDiscovery labels. NodeFeatureDiscoveryLabelPrefix = "feature.node.kubernetes.io/" // NodeSelectorLabel is a label for linking Node with DPU. NodeSelectorLabel = NodeFeatureDiscoveryLabelPrefix + "dpu-enabled" // DPUSetDPUTemplateSpecHashLabelKey is the label for the hash of the DPU template spec from the DPUSet. DPUSetDPUTemplateSpecHashLabelKey = DPUProvisioningPrefix + "dpuset-dpu-template-spec-hash" // LastAppliedLabelsOnDPUKey is the key for the last applied labels. LastAppliedLabelsOnDPUKey = DPUProvisioningPrefix + "last-applied-labels-on-dpu" // ProvisioningComponentLabelKey is the label for the component of the DPU. ProvisioningComponentLabelKey = DPUProvisioningPrefix + "component" // HostNameDPULabelKey is the label added to the DPU Kubernetes Node that indicates the hostname // of the host that this DPU belongs to. // Deprecated: This field is deprecated and will be removed with v26.7.0. Use provisioningv1.DPUNodeNameLabel and // provisioningv1.DPUNodeNamespaceLabel instead. HostNameDPULabelKey = DPUProvisioningPrefix + "host" // SkipDpuProvisioningLabel is the label used to skip DPU provisioning SkipDpuProvisioningLabel = DPUProvisioningPrefix + "skip-dpu-provisioning" // OverrideDMSPodNameAnnotationKey is the key for the override DMS pod name annotation. OverrideDMSPodNameAnnotationKey = DPUProvisioningPrefix + "override-dms-pod-name" // LastAppliedAdditionalRequestorsOnDPUPrefix is the prefix of the annotation key for the last applied node maintenance additional requestors per DPU. LastAppliedAdditionalRequestorsOnDPUPrefix = DPUProvisioningPrefix + "last-applied-additional-requestors-on-" // HoldNodeEffectKey is the key for the hold node effect annotation. HoldNodeEffectKey = DPUProvisioningPrefix + "wait-for-external-nodeeffect" // TrustedSFCount is the key for the trusted SFC count annotation. TrustedSFCount = DPUProvisioningPrefix + "num-of-trusted-sfs" // SkipBFCFGSizeCheck is the annotation key to skip the bf.cfg size check. SkipBFCFGSizeCheck = DPUProvisioningPrefix + "skip-bfcfg-size-check" // TolerationNotReadyKey is the key for the NotReady taint. TolerationNotReadyKey = "node.kubernetes.io/not-ready" // TolerationUnreachableKey is the key for the Unreachable taint. TolerationUnreachableKey = "node.kubernetes.io/unreachable" // TolerationUnschedulableKey is the key for the Unschedulable taint. TolerationUnschedulableKey = "node.kubernetes.io/unschedulable" TolerationOVNKubernetesNetworkUnavailableKey = "k8s.ovn.org/network-unavailable" // RequeueInterval is the interval to requeue the request. RequeueInterval = 5 * time.Second // RebootSyncInterval is the interval to requeue the request for waiting all DPUs get into non-provisioning phase. RebootSyncInterval = 30 * time.Second // CFGExtension is the extension of the BFB configuration file. CFGExtension = "cfg" // NodeMaintenanceRequestorID is the requestor ID used for NodeMaintenance CRs NodeMaintenanceRequestorID = "dpu.nvidia.com" // ProvisioningGroupName is the provisioning group, used to identify provisioning as // additional Requestors in NodeMaintenance CR. ProvisioningGroupName = "provisioning.dpu.nvidia.com" // PCIAddressTargetKey is the key for the PCI address in the metadata context. PCIAddressTargetKey = "target" // KubernetesVersion is the version used by the DPUCluster. // It has a one-to-one relationship with the DPF Operator version and needs to be updated with each minor release. KubernetesVersion = "v1.34.0" // MaxNameLength is the maximum length of the name of the K8s resource. MaxNameLength = validation.DNS1123SubdomainMaxLength // 253 // HostPowerCycleRequireKey is the key for the host power cycle required annotation. HostPowerCycleRequireKey = DPUProvisioningPrefix + "host-power-cycle-required" // AgentCondRebootMethodDiscovery is set True when the device-query reboot path is active. // Reason is the resolved RebootMethodType (e.g. SystemLevelReset, NoAction); Message holds mlxfwreset JSON when reset is needed. AgentCondRebootMethodDiscovery = "RebootMethodDiscovery" )
const ( ServiceAccountCAPath = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" LabelDPUName = "provisioning.dpu.nvidia.com/dpu-name" LabelDPUNamespace = "provisioning.dpu.nvidia.com/dpu-namespace" // DPUAgentBootstrapGroup is the extra group assigned to bootstrap tokens // for DPU agents. It must match the subject group in the ClusterRoleBinding // deployed by the operator (config/provisioning/rbac/dpuagent_bootstrap.yaml). DPUAgentBootstrapGroup = "system:bootstrappers:dpf:dpu-agent" )
Variables ¶
var ( // Location of BFB binary files BFBBaseDir = "bfb" KubeconfigBaseDir = "kubeconfig" )
Functions ¶
func AddLabelsToObject ¶
func AddLabelsToObject(ctx context.Context, client crclient.Client, obj metav1.Object, labels map[string]string) error
AddLabelsToObject adds the given labels to any Kubernetes object implementing metav1.Object
func ContainsDPUPhase ¶
func ContainsDPUPhase(phases map[string]struct{}, phase provisioningv1.DPUPhase) bool
func ContainsDPUPhases ¶
func CopyLabelsOrAnnotations ¶
CopyLabelsOrAnnotations merges source labels/annotations into target. If target is nil, it will be initialized.
func CreateDPUAgentBootstrapKubeconfig ¶
func CreateDPUAgentBootstrapKubeconfig(ctx context.Context, client crclient.Client, dpu *provisioningv1.DPU, apiServerAddress, caPath string) ([]byte, error)
CreateDPUAgentBootstrapKubeconfig creates a short-lived bootstrap token secret in kube-system for the DPU agent and returns a kubeconfig that authenticates with that token. If a valid (non-expired) token already exists for this DPU (identified by labels), it reuses the existing token. caPath is the path to the CA certificate file (typically ServiceAccountCAPath).
func CreateDPUAgentRole ¶
func CreateDPUAgentRole(ctx context.Context, client crclient.Client, scheme *runtime.Scheme, dpu *provisioningv1.DPU) error
CreateDPUAgentRole creates a per-DPU Role that restricts the DPU agent to only its own DPU CR and kubeadm join Secret.
func CreateDPUAgentRoleBinding ¶
func CreateDPUAgentRoleBinding(ctx context.Context, client crclient.Client, scheme *runtime.Scheme, dpu *provisioningv1.DPU) error
CreateDPUAgentRoleBinding creates a per-DPU RoleBinding that binds the certificate username (da-{dpu.name}) to the per-DPU Role.
func DPUCondition ¶
func DPUCondition(condType provisioningv1.DPUConditionType, reason, message string) *metav1.Condition
func DeleteDPUAgentBootstrapTokens ¶
func DeleteDPUAgentBootstrapTokens(ctx context.Context, client crclient.Client, dpuName, dpuNamespace string) error
DeleteDPUAgentBootstrapTokens deletes all bootstrap token secrets in kube-system that belong to the specified DPU (identified by labels).
func DeleteObjects ¶
func GenerateBFBCFGFilePath ¶
func GenerateBFBFilePath ¶
func GenerateBFBTMPFilePath ¶
func GenerateBFBTaskName ¶
func GenerateBFBTaskName(bfb provisioningv1.BFB) string
func GenerateBFCFGFileName ¶
func GenerateDMSPodName ¶
GenerateDMSPodName creates a name for the DMS pod based off the name of the passed object. This allows the passed object to be either a DPUNode or a corev1.Node.
func GenerateDPUName ¶
func GenerateDPUNodeMaintenanceObjectName ¶
func GenerateDPUNodeMaintenanceObjectName(dpuNodeName string, nodeEffect *provisioningv1.NodeEffect) (string, error)
func GenerateNodeName ¶
func GenerateNodeName(dpu *provisioningv1.DPU) string
func GetBFBRegistryAddressWithPort ¶
func GetBFBRegistryAddressWithPort(ctx context.Context, c crclient.Client, namespace, hostOnlyBase string) (string, error)
GetBFBRegistryAddressWithPort returns the full bfb-registry address with port
func GetClientset ¶
func GetClientset(ctx context.Context, client crclient.Client, dc *provisioningv1.DPUCluster) (*kubernetes.Clientset, []byte, error)
func GetDPUCondition ¶
func GetDPUDeviceCondition ¶
func GetDPUPhases ¶
func GetDPUsWithPhase ¶
func GetDPUsWithPhase(ctx context.Context, client crclient.Client, dpuNode *provisioningv1.DPUNode, phase provisioningv1.DPUPhase) ([]*provisioningv1.DPU, error)
func GetNamespacedName ¶
func GetNamespacedName(obj metav1.Object) types.NamespacedName
func GetNodeFromDPUCluster ¶
func GetObjects ¶
func GetRemovedLabels ¶
GetRemovedLabels returns the labels that are present in oldLabels but not in newLabels
func IsDPUAfterProvisioningPhase ¶
func IsDPUAfterProvisioningPhase(phase provisioningv1.DPUPhase) bool
func IsDPUBeforeProvisioningPhase ¶
func IsDPUBeforeProvisioningPhase(phase provisioningv1.DPUPhase) bool
func IsDPUInProvisioningPhase ¶
func IsDPUInProvisioningPhase(phase provisioningv1.DPUPhase) bool
IsDPUInProvisioningPhase returns true if the phase is between Pending (exclusive) and OSInstalling
func IsDPUNodeReady ¶
func IsDPUNodeReady(dpuNode *provisioningv1.DPUNode) bool
func IsNodeEffectApplied ¶
func IsNodeEffectApplied(dpunodemaintenance *provisioningv1.DPUNodeMaintenance) bool
func IsNodeReady ¶
func KubeadmJoinSecretName ¶
func MarshalJSON ¶
func NeedUpdateLabels ¶
NeedUpdateLabels compares two labels. If label 2 does not contain all the key-value pairs of label 1, then return true. otherwise return false
func NewCondition ¶
NewCondition creates a new metav1.Condition with the given parameters. todo: merge with DPUCondition()
func RemoveDuplicates ¶
RemoveDuplicates removes duplicates from an array.
func ReplaceDaemonSetPodNodeNameNodeAffinity ¶
func ReplaceDaemonSetPodNodeNameNodeAffinity(affinity *corev1.Affinity, nodename string) *corev1.Affinity
ReplaceDaemonSetPodNodeNameNodeAffinity replaces the RequiredDuringSchedulingIgnoredDuringExecution NodeAffinity of the given affinity with a new NodeAffinity that selects the given nodeName. Note that this function assumes that no NodeAffinity conflicts with the selected nodeName.
This method is copied from https://github.com/kubernetes/kubernetes/blob/dbc2b0a5c7acc349ea71a14e49913661eaf708d2/pkg/controller/daemon/util/daemonset_util.go#L176
func SetDPUCondition ¶
func SetDPUCondition(status *provisioningv1.DPUStatus, condition *metav1.Condition) bool
func SetDPUDeviceCondition ¶
func SetDPUDeviceCondition(dpuDevice *provisioningv1.DPUDevice, condition *metav1.Condition) bool
func UpdateLabelsToNode ¶
func UpdateLabelsToNode(ctx context.Context, client crclient.Client, node *corev1.Node, labels map[string]string) error
UpdateLabelsToNode updates the labels of the given node. It will add the new labels and remove the labels that are not in the new labels. It will also update the last applied labels annotation.
Types ¶
This section is empty.