Documentation
¶
Index ¶
- Constants
- func AddNodeLabel(cl *kubernetes.Clientset, nodeName string, key string, value string) error
- func CheckDeploymentWithStandardKMMNFD(cl *kubernetes.Clientset, create bool) error
- func CheckGpuLabel(rl v1.ResourceList) bool
- func CheckHelmDeployment(cl *kubernetes.Clientset, ns string, create bool) error
- func CheckHelmOCDeployment(cl *kubernetes.Clientset, create bool) error
- func CheckOCDeploymentWithStandardKMMNFD(cl *kubernetes.Clientset, create bool) error
- func CreateDaemonset(cl *kubernetes.Clientset, ns string, name string, image string, ...) error
- func CreateDaemonsetVerify(ctx context.Context, cl *kubernetes.Clientset, ns string, name string, ...) error
- func CreateMinioService(ctx context.Context, cl *kubernetes.Clientset, ns, hostName string) error
- func CreateOpaqueSecret(ctx context.Context, cl *kubernetes.Clientset, name, ns string, ...) error
- func CreatePod(ctx context.Context, cl *kubernetes.Clientset, ns string, name string, ...) error
- func CreateTLSSecret(ctx context.Context, cl *kubernetes.Clientset, name, ns string, ...) error
- func CreateTempFile(fileName string, data []byte) (*os.File, error)
- func CurlMetrics(endpointIPs []string, token string, port int, secure bool, caCertPath string, ...) error
- func DelDaemonset(cl *kubernetes.Clientset, ns string, name string) error
- func DelRocmPods(ctx context.Context, cl *kubernetes.Clientset) error
- func DelRocmPodsByNodeNames(ctx context.Context, cl *kubernetes.Clientset, workerNodeNames []string) error
- func DeleteMinioService(ctx context.Context, cl *kubernetes.Clientset, ns string)
- func DeleteNodeAppDaemonSet(cl *kubernetes.Clientset) error
- func DeleteNodeLabel(cl *kubernetes.Clientset, nodeName string, key string) error
- func DeleteOpaqueSecret(ctx context.Context, cl *kubernetes.Clientset, name, ns string)
- func DeletePod(ctx context.Context, cl *kubernetes.Clientset, ns string, name string) error
- func DeleteRebootPod(ctx context.Context, cl *kubernetes.Clientset, nodeName string, force bool)
- func DeleteTLSSecret(ctx context.Context, cl *kubernetes.Clientset, name, ns string) error
- func DeleteTempFile(file *os.File) error
- func DeployNodeAppDaemonSet(cl *kubernetes.Clientset) error
- func DeployResourcesFromFile(pathOrURL string, cl *kubernetes.Clientset, apiCl *apiextClient.Clientset, ...) error
- func DeployRocmPods(ctx context.Context, cl *kubernetes.Clientset, res *v1.ResourceRequirements) error
- func DeployRocmPodsByNodeNames(ctx context.Context, cl *kubernetes.Clientset, workerNodeNames []string) error
- func DevicePluginName(cfgName string) string
- func ExecPodCmd(command string, ns string, name string, container string) (string, error)
- func GenerateServiceAccountToken(clientset *kubernetes.Clientset, serviceAccountName, namespace string) (string, error)
- func GetAMDGPUCount(ctx context.Context, cl *kubernetes.Clientset) (map[string]int, error)
- func GetAMDGpuWorker(cl *kubernetes.Clientset, isOpenshift bool) []v1.Node
- func GetClusterIP(clientset *kubernetes.Clientset, serviceName, namespace string) (string, error)
- func GetClusterType(cfg *rest.Config) string
- func GetGpuDriverVersion(name string) (string, error)
- func GetJobLogs(clientset *kubernetes.Clientset, job *batchv1.Job) ([]string, error)
- func GetNodeIP(ctx context.Context, cl *kubernetes.Clientset, nodeName string) (string, error)
- func GetNodeIPs(clientset *kubernetes.Clientset) ([]string, error)
- func GetNodeIPsForDaemonSet(clientset *kubernetes.Clientset, daemonSetName, namespace string) ([]string, error)
- func GetNonAMDGpuWorker(cl *kubernetes.Clientset) []v1.Node
- func GetPodNamesFromJob(clientset *kubernetes.Clientset, job *batchv1.Job) ([]string, error)
- func GetRebootPod(nodeName string) *v1.Pod
- func GetRocmInfo(name string) (string, error)
- func GetServiceEndpoints(clientset *kubernetes.Clientset, serviceName, namespace string) ([]string, error)
- func GetWorkerNodes(cl *kubernetes.Clientset) []*v1.Node
- func HandleNodesReboot(ctx context.Context, cl *kubernetes.Clientset, nodes []v1.Node) error
- func IsJSONParsable(s string) bool
- func IsNodeHealthy(cl *kubernetes.Clientset, nodeip string) error
- func ListGpuDrivers(name string) (string, error)
- func ListRocmPods(ctx context.Context, cl *kubernetes.Clientset) ([]string, error)
- func ListRocmPodsByNodeNames(ctx context.Context, workerNodeNames []string) []string
- func NFDWorkerName(isOpenshift bool) string
- func NodeLabellerName(cfgName string) string
- func NodeTaint(cl *kubernetes.Clientset, nodeName string) error
- func PatchKMMDeploymentWithCIENVFlag(cl *kubernetes.Clientset) error
- func PatchOperatorControllerDeploymentWithCIENVFlag(cl *kubernetes.Clientset) error
- func RebootNode(cl *kubernetes.Clientset, nodeip string) error
- func RebootNodeWithWait(ctx context.Context, cl *kubernetes.Clientset, nodeName string) error
- func RebootNodesWithWait(ctx context.Context, cl *kubernetes.Clientset, nodes []v1.Node) error
- func Retry(f func() error, timeout time.Duration, period time.Duration) error
- func RunCommand(command string)
- func RunCommandOnNode(ctx context.Context, cl *kubernetes.Clientset, nodeName, command string) (string, error)
- func SetGPUHealthOnNode(cl *kubernetes.Clientset, ns, gpuid, health, nodeName string) error
- func SetupAccessKeysOnMinioServer(ns, pod, container, accessKey, secretKey string)
- func SplitYAML(data []byte) [][]byte
- func VerifyROCMPODResourceCount(ctx context.Context, cl *kubernetes.Clientset, gpuReqCount int) error
- type UserRequest
Constants ¶
View Source
const ClusterTypeK8s = "kubernetes"
View Source
const ClusterTypeOpenShift = "openshift"
View Source
const HttpServerPort = "8084"
Variables ¶
This section is empty.
Functions ¶
func AddNodeLabel ¶ added in v1.2.2
func CheckDeploymentWithStandardKMMNFD ¶
func CheckDeploymentWithStandardKMMNFD(cl *kubernetes.Clientset, create bool) error
func CheckGpuLabel ¶
func CheckGpuLabel(rl v1.ResourceList) bool
func CheckHelmDeployment ¶
func CheckHelmDeployment(cl *kubernetes.Clientset, ns string, create bool) error
func CheckHelmOCDeployment ¶
func CheckHelmOCDeployment(cl *kubernetes.Clientset, create bool) error
func CheckOCDeploymentWithStandardKMMNFD ¶
func CheckOCDeploymentWithStandardKMMNFD(cl *kubernetes.Clientset, create bool) error
func CreateDaemonset ¶
func CreateDaemonset(cl *kubernetes.Clientset, ns string, name string, image string, matchLabels map[string]string, res *v1.ResourceRequirements) error
func CreateDaemonsetVerify ¶
func CreateMinioService ¶ added in v1.2.2
func CreateOpaqueSecret ¶ added in v1.2.2
func CreateTLSSecret ¶
func CurlMetrics ¶
func DelDaemonset ¶
func DelDaemonset(cl *kubernetes.Clientset, ns string, name string) error
func DelRocmPods ¶
func DelRocmPods(ctx context.Context, cl *kubernetes.Clientset) error
func DelRocmPodsByNodeNames ¶
func DeleteMinioService ¶ added in v1.2.2
func DeleteMinioService(ctx context.Context, cl *kubernetes.Clientset, ns string)
func DeleteNodeAppDaemonSet ¶
func DeleteNodeAppDaemonSet(cl *kubernetes.Clientset) error
func DeleteNodeLabel ¶ added in v1.2.2
func DeleteNodeLabel(cl *kubernetes.Clientset, nodeName string, key string) error
func DeleteOpaqueSecret ¶ added in v1.2.2
func DeleteOpaqueSecret(ctx context.Context, cl *kubernetes.Clientset, name, ns string)
func DeleteRebootPod ¶
func DeleteTLSSecret ¶
func DeleteTempFile ¶
func DeployNodeAppDaemonSet ¶
func DeployNodeAppDaemonSet(cl *kubernetes.Clientset) error
func DeployResourcesFromFile ¶
func DeployResourcesFromFile(pathOrURL string, cl *kubernetes.Clientset, apiCl *apiextClient.Clientset, create bool) error
func DeployRocmPods ¶
func DeployRocmPods(ctx context.Context, cl *kubernetes.Clientset, res *v1.ResourceRequirements) error
func DevicePluginName ¶
func ExecPodCmd ¶
func GenerateServiceAccountToken ¶
func GenerateServiceAccountToken(clientset *kubernetes.Clientset, serviceAccountName, namespace string) (string, error)
func GetAMDGPUCount ¶
func GetAMDGpuWorker ¶
func GetAMDGpuWorker(cl *kubernetes.Clientset, isOpenshift bool) []v1.Node
func GetClusterIP ¶
func GetClusterIP(clientset *kubernetes.Clientset, serviceName, namespace string) (string, error)
func GetClusterType ¶
func GetGpuDriverVersion ¶
func GetJobLogs ¶
func GetNodeIPs ¶
func GetNodeIPs(clientset *kubernetes.Clientset) ([]string, error)
func GetNodeIPsForDaemonSet ¶
func GetNodeIPsForDaemonSet(clientset *kubernetes.Clientset, daemonSetName, namespace string) ([]string, error)
func GetNonAMDGpuWorker ¶
func GetNonAMDGpuWorker(cl *kubernetes.Clientset) []v1.Node
func GetPodNamesFromJob ¶
func GetRebootPod ¶
func GetRocmInfo ¶
func GetServiceEndpoints ¶
func GetServiceEndpoints(clientset *kubernetes.Clientset, serviceName, namespace string) ([]string, error)
func GetWorkerNodes ¶
func GetWorkerNodes(cl *kubernetes.Clientset) []*v1.Node
func HandleNodesReboot ¶
func IsJSONParsable ¶
func IsNodeHealthy ¶
func IsNodeHealthy(cl *kubernetes.Clientset, nodeip string) error
func ListGpuDrivers ¶
func ListRocmPods ¶
func ListRocmPodsByNodeNames ¶
func NFDWorkerName ¶
func NodeLabellerName ¶
func PatchKMMDeploymentWithCIENVFlag ¶
func PatchKMMDeploymentWithCIENVFlag(cl *kubernetes.Clientset) error
func PatchOperatorControllerDeploymentWithCIENVFlag ¶
func PatchOperatorControllerDeploymentWithCIENVFlag(cl *kubernetes.Clientset) error
func RebootNode ¶
func RebootNode(cl *kubernetes.Clientset, nodeip string) error
func RebootNodeWithWait ¶
func RebootNodesWithWait ¶
func RunCommand ¶
func RunCommand(command string)
func RunCommandOnNode ¶
func SetGPUHealthOnNode ¶
func SetGPUHealthOnNode(cl *kubernetes.Clientset, ns, gpuid, health, nodeName string) error
func SetupAccessKeysOnMinioServer ¶ added in v1.2.2
func SetupAccessKeysOnMinioServer(ns, pod, container, accessKey, secretKey string)
Types ¶
type UserRequest ¶
type UserRequest struct {
Command string `json:"command"`
}
Click to show internal directories.
Click to hide internal directories.