utils

package
v1.4.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 19, 2025 License: Apache-2.0 Imports: 37 Imported by: 0

Documentation

Index

Constants

View Source
const ClusterTypeK8s = "kubernetes"
View Source
const ClusterTypeOpenShift = "openshift"
View Source
const HttpServerPort = "8084"

Variables

This section is empty.

Functions

func AddNodeLabel added in v1.2.2

func AddNodeLabel(cl *kubernetes.Clientset, nodeName string, key string, value string) error

func CheckDeploymentWithStandardKMMNFD

func CheckDeploymentWithStandardKMMNFD(cl *kubernetes.Clientset, create bool) error

func CheckGpuLabel

func CheckGpuLabel(rl v1.ResourceList, label string) bool

func CheckHelmDeployment

func CheckHelmDeployment(cl *kubernetes.Clientset, ns string, create bool) error

func CheckHelmOCDeployment

func CheckHelmOCDeployment(cl *kubernetes.Clientset, create bool) error

func CheckOCDeploymentWithStandardKMMNFD

func CheckOCDeploymentWithStandardKMMNFD(cl *kubernetes.Clientset, create bool) error

func CreateConfigMap added in v1.4.0

func CreateConfigMap(ctx context.Context, cl *kubernetes.Clientset, ns string, cmName string, data map[string]string) error

func CreateDaemonset

func CreateDaemonset(cl *kubernetes.Clientset, ns string, name string, image string, matchLabels map[string]string, res *v1.ResourceRequirements) error

func CreateDaemonsetVerify

func CreateDaemonsetVerify(ctx context.Context, cl *kubernetes.Clientset, ns string,
	name string, image string, matchLabels map[string]string,
	res *v1.ResourceRequirements) error

func CreateMinioService added in v1.2.2

func CreateMinioService(ctx context.Context, cl *kubernetes.Clientset, ns, hostName string) error

func CreateOpaqueSecret added in v1.2.2

func CreateOpaqueSecret(ctx context.Context, cl *kubernetes.Clientset, name, ns string, keys map[string]string) error

func CreatePod

func CreatePod(ctx context.Context, cl *kubernetes.Clientset, ns string,
	name string, image string, workerNodeName string) error

func CreateTLSSecret

func CreateTLSSecret(ctx context.Context, cl *kubernetes.Clientset, name, ns string, crt, key []byte) error

func CreateTempFile

func CreateTempFile(fileName string, data []byte) (*os.File, error)

func CurlMetrics

func CurlMetrics(
	endpointIPs []string,
	token string,
	port int,
	secure bool,
	caCertPath string,
	clientCertPath string,
	clientKeyPath string,
) error

func DelDaemonset

func DelDaemonset(cl *kubernetes.Clientset, ns string, name string) error

func DelRocmPods

func DelRocmPods(ctx context.Context, cl *kubernetes.Clientset) error

func DelRocmPodsByNodeNames

func DelRocmPodsByNodeNames(ctx context.Context, cl *kubernetes.Clientset,
	workerNodeNames []string) error

func DeleteConfigMap added in v1.4.0

func DeleteConfigMap(ctx context.Context, cl *kubernetes.Clientset, ns string, cmName string) error

func DeleteMinioService added in v1.2.2

func DeleteMinioService(ctx context.Context, cl *kubernetes.Clientset, ns string)

func DeleteNodeAppDaemonSet

func DeleteNodeAppDaemonSet(cl *kubernetes.Clientset) error

func DeleteNodeLabel added in v1.2.2

func DeleteNodeLabel(cl *kubernetes.Clientset, nodeName string, key string) error

func DeleteOpaqueSecret added in v1.2.2

func DeleteOpaqueSecret(ctx context.Context, cl *kubernetes.Clientset, name, ns string)

func DeletePod

func DeletePod(ctx context.Context, cl *kubernetes.Clientset, ns string,
	name string) error

func DeleteRebootPod

func DeleteRebootPod(ctx context.Context, cl *kubernetes.Clientset, nodeName string, force bool)

func DeleteTLSSecret

func DeleteTLSSecret(ctx context.Context, cl *kubernetes.Clientset, name, ns string) error

func DeleteTempFile

func DeleteTempFile(file *os.File) error

func DeployNodeAppDaemonSet

func DeployNodeAppDaemonSet(cl *kubernetes.Clientset) error

func DeployResourcesFromFile

func DeployResourcesFromFile(pathOrURL string, cl *kubernetes.Clientset, apiCl *apiextClient.Clientset, create bool) error

func DeployRocmPods

func DeployRocmPods(ctx context.Context, cl *kubernetes.Clientset,
	res *v1.ResourceRequirements) error

func DeployRocmPodsByNodeNames

func DeployRocmPodsByNodeNames(ctx context.Context, cl *kubernetes.Clientset,
	workerNodeNames []string) error

func DeployRocmPytorchPods added in v1.4.0

func DeployRocmPytorchPods(ctx context.Context, cl *kubernetes.Clientset,
	res *v1.ResourceRequirements) error

func DevicePluginName

func DevicePluginName(cfgName string) string

func ExecPodCmd

func ExecPodCmd(command string, ns string, name string, container string) (string, error)

func GenerateServiceAccountToken

func GenerateServiceAccountToken(clientset *kubernetes.Clientset, serviceAccountName, namespace string) (string, error)

func GetAMDGPUCount

func GetAMDGPUCount(ctx context.Context, cl *kubernetes.Clientset, resourceType string) (map[string]int, error)

func GetAMDGpuWorker

func GetAMDGpuWorker(cl *kubernetes.Clientset, isOpenshift bool) []v1.Node

func GetClusterIP

func GetClusterIP(clientset *kubernetes.Clientset, serviceName, namespace string) (string, error)

func GetClusterType

func GetClusterType(cfg *rest.Config) string

func GetGpuDriverVersion

func GetGpuDriverVersion(name string) (string, error)

func GetJobLogs

func GetJobLogs(clientset *kubernetes.Clientset, job *batchv1.Job) ([]string, error)

func GetNodeIP

func GetNodeIP(ctx context.Context, cl *kubernetes.Clientset,
	nodeName string) (string, error)

func GetNodeIPs

func GetNodeIPs(clientset *kubernetes.Clientset) ([]string, error)

func GetNodeIPsForDaemonSet

func GetNodeIPsForDaemonSet(clientset *kubernetes.Clientset, daemonSetName, namespace string) ([]string, error)

func GetNonAMDGpuWorker

func GetNonAMDGpuWorker(cl *kubernetes.Clientset) []v1.Node

func GetPodNamesFromJob

func GetPodNamesFromJob(clientset *kubernetes.Clientset, job *batchv1.Job) ([]string, error)

func GetRebootPod

func GetRebootPod(nodeName string) *v1.Pod

func GetRocmInfo

func GetRocmInfo(name string) (string, error)

func GetServiceEndpoints

func GetServiceEndpoints(clientset *kubernetes.Clientset, serviceName, namespace string) ([]string, error)

func GetWorkerNodes

func GetWorkerNodes(cl *kubernetes.Clientset) []*v1.Node

func HandleNodesReboot

func HandleNodesReboot(ctx context.Context, cl *kubernetes.Clientset, nodes []v1.Node) error

func IsJSONParsable

func IsJSONParsable(s string) bool

func ListGpuDrivers

func ListGpuDrivers(name string) (string, error)

func ListRocmPods

func ListRocmPods(ctx context.Context, cl *kubernetes.Clientset) ([]string, error)

func ListRocmPodsByNodeNames

func ListRocmPodsByNodeNames(ctx context.Context,
	workerNodeNames []string) []string

func NFDWorkerName

func NFDWorkerName(isOpenshift bool) string

func NodeLabellerName

func NodeLabellerName(cfgName string) string

func NodeTaint added in v1.2.2

func NodeTaint(cl *kubernetes.Clientset, nodeName string) error

func PatchKMMDeploymentWithCIENVFlag

func PatchKMMDeploymentWithCIENVFlag(cl *kubernetes.Clientset) error

func PatchOperatorControllerDeploymentWithCIENVFlag

func PatchOperatorControllerDeploymentWithCIENVFlag(cl *kubernetes.Clientset) error

func RemoveMinioServiceAccount added in v1.4.0

func RemoveMinioServiceAccount(ns, pod, container, accessKey string)

func Retry

func Retry(f func() error, timeout time.Duration, period time.Duration) error

func RunCommand

func RunCommand(command string)

func RunCommandOnNode

func RunCommandOnNode(ctx context.Context, cl *kubernetes.Clientset, nodeName, command string) (string, error)

func SetGPUHealthOnNode

func SetGPUHealthOnNode(cl *kubernetes.Clientset, ns, gpuid, health, nodeName string) error

func SetupAccessKeysOnMinioServer added in v1.2.2

func SetupAccessKeysOnMinioServer(ns, pod, container, accessKey, secretKey string)

func SplitYAML

func SplitYAML(data []byte) [][]byte

func VerifyROCMPODResourceCount

func VerifyROCMPODResourceCount(ctx context.Context, cl *kubernetes.Clientset,
	gpuReqCount int, resourceType string) error

Types

type UserRequest

type UserRequest struct {
	Command string `json:"command"`
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL