assertions

package
v0.0.0-...-f707090 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 8, 2025 License: MIT Imports: 9 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func AlertCount

func AlertCount(t *testing.T, client monitoring.AlertClient, expectedCount int, labels map[string]string)

AlertCount 检查告警数量是否匹配期望值

func AlertDuration

func AlertDuration(t *testing.T, client monitoring.AlertClient, name string, minDuration, maxDuration time.Duration, labels map[string]string)

AlertDuration 检查告警是否在期望的持续时间内触发

func AlertFiring

func AlertFiring(t *testing.T, client monitoring.AlertClient, name string, labels map[string]string)

AlertFiring 检查告警是否处于触发状态

func AlertNotExists

func AlertNotExists(t *testing.T, client monitoring.AlertClient, name string, labels map[string]string)

AlertNotExists 检查告警是否不存在

func AlertResolved

func AlertResolved(t *testing.T, client monitoring.AlertClient, name string, labels map[string]string)

AlertResolved 检查告警是否已解决

func AlertSeverity

func AlertSeverity(t *testing.T, client monitoring.AlertClient, name, expectedSeverity string, labels map[string]string)

AlertSeverity 检查告警是否具有期望的严重程度

func AllNodesSynced

func AllNodesSynced(t *testing.T, nodes []*k8s.Node, expectedStep int)

AllNodesSynced checks if all nodes in a distributed training setup are synchronized

func EventOccurred

func EventOccurred(t *testing.T, k8sClient k8s.K8sClient, objectRef *k8s.ObjectReference, eventType, reason string)

EventOccurred checks if a specific event occurred

func GPUAllocated

func GPUAllocated(t *testing.T, gpuManager resource.GPUManager, gpuID, jobID string)

GPUAllocated 检查GPU是否分配给特定任务

func GPUAvailable

func GPUAvailable(t *testing.T, gpuManager resource.GPUManager, nodeName string, expectedCount int)

GPUAvailable 检查节点上可用的GPU数量是否符合期望

func GPUReleased

func GPUReleased(t *testing.T, gpuManager resource.GPUManager, gpuID string)

GPUReleased 检查GPU是否已释放(未分配)

func JobCompleted

func JobCompleted(t *testing.T, job *k8s.PyTorchJob)

JobCompleted checks if a job completed successfully

func JobCreated

func JobCreated(t *testing.T, k8sClient k8s.K8sClient, jobName string)

JobCreated checks if a job was created successfully

func JobDeleted

func JobDeleted(t *testing.T, k8sClient k8s.K8sClient, jobName string)

JobDeleted checks if a job was deleted successfully

func JobDuration

func JobDuration(t *testing.T, job *k8s.PyTorchJob, minDuration, maxDuration time.Duration)

JobDuration checks if job duration is within expected range

func JobFailed

func JobFailed(t *testing.T, job *k8s.PyTorchJob, expectedReason string)

JobFailed checks if a job failed as expected

func JobRestarted

func JobRestarted(t *testing.T, job *k8s.PyTorchJob)

JobRestarted checks if a job was restarted

func JobStatusTransition

func JobStatusTransition(t *testing.T, job *k8s.PyTorchJob, expectedPhases []k8s.JobPhase)

JobStatusTransition 检查任务状态转换是否匹配期望的阶段

func LogContainsMetric

func LogContainsMetric(t *testing.T, logContent, metricName string, expectedValue, tolerance float64)

LogContainsMetric checks if log content contains a specific metric with expected value

func MemoryUsage

func MemoryUsage(t *testing.T, gpu *resource.GPUInfo, maxUsagePercent float64)

MemoryUsage checks if GPU memory usage is within expected range

func MetricCount

func MetricCount(t *testing.T, client monitoring.MetricClient, name string, expectedCount int, labels map[string]string)

MetricCount 检查指标数量是否匹配期望值

func MetricExists

func MetricExists(t *testing.T, client monitoring.MetricClient, name string, labels map[string]string)

MetricExists 检查指标是否存在

func MetricNotExists

func MetricNotExists(t *testing.T, client monitoring.MetricClient, name string, labels map[string]string)

MetricNotExists 检查指标是否不存在

func MetricRecent

func MetricRecent(t *testing.T, client monitoring.MetricClient, name string, within time.Duration, labels map[string]string)

MetricRecent 检查指标是否在最近记录(在指定时间范围内)

func MetricRecorded

func MetricRecorded(t *testing.T, client monitoring.MetricClient, name string, expectedValue float64, labels map[string]string)

MetricRecorded 检查指标是否记录了期望的值

func MetricValueInRange

func MetricValueInRange(t *testing.T, client monitoring.MetricClient, name string, minValue, maxValue float64, labels map[string]string)

MetricValueInRange 检查指标值是否在期望范围内

func ModelFileValid

func ModelFileValid(t *testing.T, storageClient storage.StorageClient, bucket, key string, expectedSize int64, expectedHash string)

ModelFileValid checks if a model file is valid (size and hash)

func NodeReady

func NodeReady(t *testing.T, k8sClient k8s.K8sClient, nodeName string)

NodeReady checks if a node is in ready state

func PodScheduled

func PodScheduled(t *testing.T, k8sClient k8s.K8sClient, podName, expectedNode string)

PodScheduled checks if a pod was scheduled on the expected node

func ResourceUtilization

func ResourceUtilization(t *testing.T, gpu *resource.GPUInfo, minUtilization, maxUtilization int)

ResourceUtilization checks if GPU utilization is within expected range

func ResourcesAllocated

func ResourcesAllocated(t *testing.T, job *k8s.PyTorchJob, expectedCPU, expectedMemory, expectedGPU string)

ResourcesAllocated 检查资源是否按期望分配

func TemperatureSafe

func TemperatureSafe(t *testing.T, gpu *resource.GPUInfo, maxTemp int)

TemperatureSafe checks if GPU temperature is within safe range

Types

This section is empty.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL