utils

package
v1.2.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 21, 2025 License: Apache-2.0 Imports: 36 Imported by: 0

Documentation

Index

Constants

View Source
const ClusterTypeK8s = "kubernetes"
View Source
const ClusterTypeOpenShift = "openshift"
View Source
const HttpServerPort = "8084"

Variables

This section is empty.

Functions

func AddNodeLabel added in v1.2.2

func AddNodeLabel(cl *kubernetes.Clientset, nodeName string, key string, value string) error

func CheckDeploymentWithStandardKMMNFD

func CheckDeploymentWithStandardKMMNFD(cl *kubernetes.Clientset, create bool) error

func CheckGpuLabel

func CheckGpuLabel(rl v1.ResourceList) bool

func CheckHelmDeployment

func CheckHelmDeployment(cl *kubernetes.Clientset, ns string, create bool) error

func CheckHelmOCDeployment

func CheckHelmOCDeployment(cl *kubernetes.Clientset, create bool) error

func CheckOCDeploymentWithStandardKMMNFD

func CheckOCDeploymentWithStandardKMMNFD(cl *kubernetes.Clientset, create bool) error

func CreateDaemonset

func CreateDaemonset(cl *kubernetes.Clientset, ns string, name string, image string, matchLabels map[string]string, res *v1.ResourceRequirements) error

func CreateDaemonsetVerify

func CreateDaemonsetVerify(ctx context.Context, cl *kubernetes.Clientset, ns string,
	name string, image string, matchLabels map[string]string,
	res *v1.ResourceRequirements) error

func CreateMinioService added in v1.2.2

func CreateMinioService(ctx context.Context, cl *kubernetes.Clientset, ns, hostName string) error

func CreateOpaqueSecret added in v1.2.2

func CreateOpaqueSecret(ctx context.Context, cl *kubernetes.Clientset, name, ns string, keys map[string]string) error

func CreatePod

func CreatePod(ctx context.Context, cl *kubernetes.Clientset, ns string,
	name string, image string, workerNodeName string) error

func CreateTLSSecret

func CreateTLSSecret(ctx context.Context, cl *kubernetes.Clientset, name, ns string, crt, key []byte) error

func CreateTempFile

func CreateTempFile(fileName string, data []byte) (*os.File, error)

func CurlMetrics

func CurlMetrics(
	endpointIPs []string,
	token string,
	port int,
	secure bool,
	caCertPath string,
	clientCertPath string,
	clientKeyPath string,
) error

func DelDaemonset

func DelDaemonset(cl *kubernetes.Clientset, ns string, name string) error

func DelRocmPods

func DelRocmPods(ctx context.Context, cl *kubernetes.Clientset) error

func DelRocmPodsByNodeNames

func DelRocmPodsByNodeNames(ctx context.Context, cl *kubernetes.Clientset,
	workerNodeNames []string) error

func DeleteMinioService added in v1.2.2

func DeleteMinioService(ctx context.Context, cl *kubernetes.Clientset, ns string)

func DeleteNodeAppDaemonSet

func DeleteNodeAppDaemonSet(cl *kubernetes.Clientset) error

func DeleteNodeLabel added in v1.2.2

func DeleteNodeLabel(cl *kubernetes.Clientset, nodeName string, key string) error

func DeleteOpaqueSecret added in v1.2.2

func DeleteOpaqueSecret(ctx context.Context, cl *kubernetes.Clientset, name, ns string)

func DeletePod

func DeletePod(ctx context.Context, cl *kubernetes.Clientset, ns string,
	name string) error

func DeleteRebootPod

func DeleteRebootPod(ctx context.Context, cl *kubernetes.Clientset, nodeName string, force bool)

func DeleteTLSSecret

func DeleteTLSSecret(ctx context.Context, cl *kubernetes.Clientset, name, ns string) error

func DeleteTempFile

func DeleteTempFile(file *os.File) error

func DeployNodeAppDaemonSet

func DeployNodeAppDaemonSet(cl *kubernetes.Clientset) error

func DeployResourcesFromFile

func DeployResourcesFromFile(pathOrURL string, cl *kubernetes.Clientset, apiCl *apiextClient.Clientset, create bool) error

func DeployRocmPods

func DeployRocmPods(ctx context.Context, cl *kubernetes.Clientset,
	res *v1.ResourceRequirements) error

func DeployRocmPodsByNodeNames

func DeployRocmPodsByNodeNames(ctx context.Context, cl *kubernetes.Clientset,
	workerNodeNames []string) error

func DevicePluginName

func DevicePluginName(cfgName string) string

func ExecPodCmd

func ExecPodCmd(command string, ns string, name string, container string) (string, error)

func GenerateServiceAccountToken

func GenerateServiceAccountToken(clientset *kubernetes.Clientset, serviceAccountName, namespace string) (string, error)

func GetAMDGPUCount

func GetAMDGPUCount(ctx context.Context, cl *kubernetes.Clientset) (map[string]int, error)

func GetAMDGpuWorker

func GetAMDGpuWorker(cl *kubernetes.Clientset, isOpenshift bool) []v1.Node

func GetClusterIP

func GetClusterIP(clientset *kubernetes.Clientset, serviceName, namespace string) (string, error)

func GetClusterType

func GetClusterType(cfg *rest.Config) string

func GetGpuDriverVersion

func GetGpuDriverVersion(name string) (string, error)

func GetJobLogs

func GetJobLogs(clientset *kubernetes.Clientset, job *batchv1.Job) ([]string, error)

func GetNodeIP

func GetNodeIP(ctx context.Context, cl *kubernetes.Clientset,
	nodeName string) (string, error)

func GetNodeIPs

func GetNodeIPs(clientset *kubernetes.Clientset) ([]string, error)

func GetNodeIPsForDaemonSet

func GetNodeIPsForDaemonSet(clientset *kubernetes.Clientset, daemonSetName, namespace string) ([]string, error)

func GetNonAMDGpuWorker

func GetNonAMDGpuWorker(cl *kubernetes.Clientset) []v1.Node

func GetPodNamesFromJob

func GetPodNamesFromJob(clientset *kubernetes.Clientset, job *batchv1.Job) ([]string, error)

func GetRebootPod

func GetRebootPod(nodeName string) *v1.Pod

func GetRocmInfo

func GetRocmInfo(name string) (string, error)

func GetServiceEndpoints

func GetServiceEndpoints(clientset *kubernetes.Clientset, serviceName, namespace string) ([]string, error)

func GetWorkerNodes

func GetWorkerNodes(cl *kubernetes.Clientset) []*v1.Node

func HandleNodesReboot

func HandleNodesReboot(ctx context.Context, cl *kubernetes.Clientset, nodes []v1.Node) error

func IsJSONParsable

func IsJSONParsable(s string) bool

func IsNodeHealthy

func IsNodeHealthy(cl *kubernetes.Clientset, nodeip string) error

func ListGpuDrivers

func ListGpuDrivers(name string) (string, error)

func ListRocmPods

func ListRocmPods(ctx context.Context, cl *kubernetes.Clientset) ([]string, error)

func ListRocmPodsByNodeNames

func ListRocmPodsByNodeNames(ctx context.Context,
	workerNodeNames []string) []string

func NFDWorkerName

func NFDWorkerName(isOpenshift bool) string

func NodeLabellerName

func NodeLabellerName(cfgName string) string

func NodeTaint added in v1.2.2

func NodeTaint(cl *kubernetes.Clientset, nodeName string) error

func PatchKMMDeploymentWithCIENVFlag

func PatchKMMDeploymentWithCIENVFlag(cl *kubernetes.Clientset) error

func PatchOperatorControllerDeploymentWithCIENVFlag

func PatchOperatorControllerDeploymentWithCIENVFlag(cl *kubernetes.Clientset) error

func RebootNode

func RebootNode(cl *kubernetes.Clientset, nodeip string) error

func RebootNodeWithWait

func RebootNodeWithWait(ctx context.Context, cl *kubernetes.Clientset,
	nodeName string) error

func RebootNodesWithWait

func RebootNodesWithWait(ctx context.Context, cl *kubernetes.Clientset, nodes []v1.Node) error

func Retry

func Retry(f func() error, timeout time.Duration, period time.Duration) error

func RunCommand

func RunCommand(command string)

func RunCommandOnNode

func RunCommandOnNode(ctx context.Context, cl *kubernetes.Clientset, nodeName, command string) (string, error)

func SetGPUHealthOnNode

func SetGPUHealthOnNode(cl *kubernetes.Clientset, ns, gpuid, health, nodeName string) error

func SetupAccessKeysOnMinioServer added in v1.2.2

func SetupAccessKeysOnMinioServer(ns, pod, container, accessKey, secretKey string)

func SplitYAML

func SplitYAML(data []byte) [][]byte

func VerifyROCMPODResourceCount

func VerifyROCMPODResourceCount(ctx context.Context, cl *kubernetes.Clientset,
	gpuReqCount int) error

Types

type UserRequest

type UserRequest struct {
	Command string `json:"command"`
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL