Documentation
¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
View Source
var MockDeployment = &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ Name: "pytorch-example", Namespace: "tensor-fusion", Labels: map[string]string{ "app": "pytorch-example", "tensor-fusion.ai/enabled": "true", }, }, Spec: appsv1.DeploymentSpec{ Replicas: ptr.To[int32](1), Selector: &metav1.LabelSelector{ MatchLabels: map[string]string{ "app": "pytorch-example", }, }, Template: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{ "app": "pytorch-example", "tensor-fusion.ai/enabled": "true", }, Annotations: map[string]string{ "tensor-fusion.ai/generate-workload": "true", "tensor-fusion.ai/gpupool": "mock", "tensor-fusion.ai/inject-container": "python", "tensor-fusion.ai/replicas": "1", "tensor-fusion.ai/tflops-limit": "10", "tensor-fusion.ai/tflops-request": "10", "tensor-fusion.ai/vram-limit": "1Gi", "tensor-fusion.ai/vram-request": "1Gi", "tensor-fusion.ai/workload": "pytorch-example", }, }, Spec: corev1.PodSpec{ Containers: []corev1.Container{ { Name: "python", Image: "pytorch/pytorch:2.4.1-cuda12.1-cudnn9-runtime", Command: []string{"sh", "-c", "sleep", "1d"}, }, }, }, }, }, }
View Source
var MockGPUPoolSpec = &tfv1.GPUPoolSpec{ CapacityConfig: &tfv1.CapacityConfig{ Oversubscription: &tfv1.Oversubscription{ TFlopsOversellRatio: 2000, }, }, NodeManagerConfig: &tfv1.NodeManagerConfig{ NodeSelector: &corev1.NodeSelector{ NodeSelectorTerms: []corev1.NodeSelectorTerm{ { MatchExpressions: []corev1.NodeSelectorRequirement{ { Key: "mock-label", Operator: "In", Values: []string{"true"}, }, }, }, }, }, NodePoolRollingUpdatePolicy: &tfv1.NodeRollingUpdatePolicy{ AutoUpdate: ptr.To(false), BatchPercentage: 25, BatchInterval: "10m", MaxDuration: "10m", MaintenanceWindow: tfv1.MaintenanceWindow{}, }, }, ComponentConfig: &tfv1.ComponentConfig{ Hypervisor: &tfv1.HypervisorConfig{ PodTemplate: &runtime.RawExtension{ Raw: lo.Must(json.Marshal( corev1.PodTemplate{ Template: corev1.PodTemplateSpec{ Spec: corev1.PodSpec{ RestartPolicy: corev1.RestartPolicyOnFailure, Containers: []corev1.Container{ { Name: "tensorfusion-hypervisor", Image: "busybox:stable-glibc", Command: []string{"sleep", "infinity"}, }, }, }, }, }, )), }, }, NodeDiscovery: &tfv1.NodeDiscoveryConfig{ PodTemplate: &runtime.RawExtension{ Raw: lo.Must(json.Marshal( corev1.PodTemplate{ Template: corev1.PodTemplateSpec{ Spec: corev1.PodSpec{ RestartPolicy: corev1.RestartPolicyOnFailure, TerminationGracePeriodSeconds: ptr.To[int64](0), Containers: []corev1.Container{ { Name: "tensorfusion-node-discovery", Image: "busybox:stable-glibc", Command: []string{"sleep", "infinity"}, }, }, }, }, }, )), }, }, Worker: &tfv1.WorkerConfig{ PodTemplate: &runtime.RawExtension{ Raw: lo.Must(json.Marshal( corev1.PodTemplate{ Template: corev1.PodTemplateSpec{ Spec: corev1.PodSpec{ TerminationGracePeriodSeconds: ptr.To[int64](0), Containers: []corev1.Container{ { Name: "tensorfusion-worker", Image: "busybox:stable-glibc", Command: []string{"sleep", "infinity"}, }, }, }, }, }, )), }, }, Client: &tfv1.ClientConfig{ OperatorEndpoint: "http://localhost:8080", PatchToPod: &runtime.RawExtension{ Raw: lo.Must(json.Marshal(map[string]any{ "spec": map[string]any{ "initContainers": []corev1.Container{ { Name: "inject-lib", Image: "busybox:stable-glibc", }, }, }, })), }, PatchToContainer: &runtime.RawExtension{ Raw: lo.Must(json.Marshal(map[string]any{ "env": []corev1.EnvVar{ { Name: "LD_PRELOAD", Value: "tensorfusion.so", }, }, })), }, }, }, QosConfig: &tfv1.QosConfig{ Definitions: []tfv1.QosDefinition{ { Name: constants.QoSLevelMedium, }, { Name: constants.QoSLevelHigh, }, }, DefaultQoS: constants.QoSLevelMedium, Pricing: []tfv1.QosPricing{ { Qos: constants.QoSLevelMedium, Requests: tfv1.GPUResourcePricingUnit{ PerFP16TFlopsPerHour: "2", PerGBOfVRAMPerHour: "1", }, LimitsOverRequestsChargingRatio: "0.5", }, { Qos: constants.QoSLevelHigh, Requests: tfv1.GPUResourcePricingUnit{ PerFP16TFlopsPerHour: "2", PerGBOfVRAMPerHour: "1", }, LimitsOverRequestsChargingRatio: "0.8", }, }, }, }
This is for unit testing
Functions ¶
func MockGpuInfo ¶ added in v1.24.0
func MockGpuInfo() *[]GpuInfo
Types ¶
type GPUFitConfig ¶ added in v1.35.0
type GPUNetworkTopologyAwareConfig ¶ added in v1.35.0
type GPUNetworkTopologyAwareConfig struct {
TotalIntranetBandWidthGBps int64 `json:"totalIntranetBandWidthGBps"`
}
type GlobalConfig ¶ added in v1.34.0
type GlobalConfig struct { MetricsTTL string `yaml:"metricsTTL"` AlertRules []alert.Rule `yaml:"alertRules"` }
func MockGlobalConfig ¶ added in v1.34.0
func MockGlobalConfig() *GlobalConfig
Click to show internal directories.
Click to hide internal directories.