config

package
v1.35.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jul 1, 2025 License: Apache-2.0 Imports: 11 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var MockDeployment = &appsv1.Deployment{
	ObjectMeta: metav1.ObjectMeta{
		Name:      "pytorch-example",
		Namespace: "tensor-fusion",
		Labels: map[string]string{
			"app":                      "pytorch-example",
			"tensor-fusion.ai/enabled": "true",
		},
	},
	Spec: appsv1.DeploymentSpec{
		Replicas: ptr.To[int32](1),
		Selector: &metav1.LabelSelector{
			MatchLabels: map[string]string{
				"app": "pytorch-example",
			},
		},
		Template: corev1.PodTemplateSpec{
			ObjectMeta: metav1.ObjectMeta{
				Labels: map[string]string{
					"app":                      "pytorch-example",
					"tensor-fusion.ai/enabled": "true",
				},
				Annotations: map[string]string{
					"tensor-fusion.ai/generate-workload": "true",
					"tensor-fusion.ai/gpupool":           "mock",
					"tensor-fusion.ai/inject-container":  "python",
					"tensor-fusion.ai/replicas":          "1",
					"tensor-fusion.ai/tflops-limit":      "10",
					"tensor-fusion.ai/tflops-request":    "10",
					"tensor-fusion.ai/vram-limit":        "1Gi",
					"tensor-fusion.ai/vram-request":      "1Gi",
					"tensor-fusion.ai/workload":          "pytorch-example",
				},
			},
			Spec: corev1.PodSpec{
				Containers: []corev1.Container{
					{
						Name:    "python",
						Image:   "pytorch/pytorch:2.4.1-cuda12.1-cudnn9-runtime",
						Command: []string{"sh", "-c", "sleep", "1d"},
					},
				},
			},
		},
	},
}
View Source
var MockGPUPoolSpec = &tfv1.GPUPoolSpec{
	CapacityConfig: &tfv1.CapacityConfig{
		Oversubscription: &tfv1.Oversubscription{
			TFlopsOversellRatio: 2000,
		},
	},
	NodeManagerConfig: &tfv1.NodeManagerConfig{
		NodeSelector: &corev1.NodeSelector{
			NodeSelectorTerms: []corev1.NodeSelectorTerm{
				{
					MatchExpressions: []corev1.NodeSelectorRequirement{
						{
							Key:      "mock-label",
							Operator: "In",
							Values:   []string{"true"},
						},
					},
				},
			},
		},
		NodePoolRollingUpdatePolicy: &tfv1.NodeRollingUpdatePolicy{
			AutoUpdate:        ptr.To(false),
			BatchPercentage:   25,
			BatchInterval:     "10m",
			MaxDuration:       "10m",
			MaintenanceWindow: tfv1.MaintenanceWindow{},
		},
	},
	ComponentConfig: &tfv1.ComponentConfig{
		Hypervisor: &tfv1.HypervisorConfig{
			PodTemplate: &runtime.RawExtension{
				Raw: lo.Must(json.Marshal(
					corev1.PodTemplate{
						Template: corev1.PodTemplateSpec{
							Spec: corev1.PodSpec{
								RestartPolicy: corev1.RestartPolicyOnFailure,
								Containers: []corev1.Container{
									{
										Name:    "tensorfusion-hypervisor",
										Image:   "busybox:stable-glibc",
										Command: []string{"sleep", "infinity"},
									},
								},
							},
						},
					},
				)),
			},
		},
		NodeDiscovery: &tfv1.NodeDiscoveryConfig{
			PodTemplate: &runtime.RawExtension{
				Raw: lo.Must(json.Marshal(
					corev1.PodTemplate{
						Template: corev1.PodTemplateSpec{
							Spec: corev1.PodSpec{
								RestartPolicy:                 corev1.RestartPolicyOnFailure,
								TerminationGracePeriodSeconds: ptr.To[int64](0),
								Containers: []corev1.Container{
									{
										Name:    "tensorfusion-node-discovery",
										Image:   "busybox:stable-glibc",
										Command: []string{"sleep", "infinity"},
									},
								},
							},
						},
					},
				)),
			},
		},
		Worker: &tfv1.WorkerConfig{
			PodTemplate: &runtime.RawExtension{
				Raw: lo.Must(json.Marshal(
					corev1.PodTemplate{
						Template: corev1.PodTemplateSpec{
							Spec: corev1.PodSpec{
								TerminationGracePeriodSeconds: ptr.To[int64](0),
								Containers: []corev1.Container{
									{
										Name:    "tensorfusion-worker",
										Image:   "busybox:stable-glibc",
										Command: []string{"sleep", "infinity"},
									},
								},
							},
						},
					},
				)),
			},
		},
		Client: &tfv1.ClientConfig{
			OperatorEndpoint: "http://localhost:8080",
			PatchToPod: &runtime.RawExtension{
				Raw: lo.Must(json.Marshal(map[string]any{
					"spec": map[string]any{
						"initContainers": []corev1.Container{
							{
								Name:  "inject-lib",
								Image: "busybox:stable-glibc",
							},
						},
					},
				})),
			},
			PatchToContainer: &runtime.RawExtension{
				Raw: lo.Must(json.Marshal(map[string]any{
					"env": []corev1.EnvVar{
						{
							Name:  "LD_PRELOAD",
							Value: "tensorfusion.so",
						},
					},
				})),
			},
		},
	},
	QosConfig: &tfv1.QosConfig{
		Definitions: []tfv1.QosDefinition{
			{
				Name: constants.QoSLevelMedium,
			},
			{
				Name: constants.QoSLevelHigh,
			},
		},
		DefaultQoS: constants.QoSLevelMedium,
		Pricing: []tfv1.QosPricing{
			{
				Qos: constants.QoSLevelMedium,
				Requests: tfv1.GPUResourcePricingUnit{
					PerFP16TFlopsPerHour: "2",
					PerGBOfVRAMPerHour:   "1",
				},
				LimitsOverRequestsChargingRatio: "0.5",
			},
			{
				Qos: constants.QoSLevelHigh,
				Requests: tfv1.GPUResourcePricingUnit{
					PerFP16TFlopsPerHour: "2",
					PerGBOfVRAMPerHour:   "1",
				},
				LimitsOverRequestsChargingRatio: "0.8",
			},
		},
	},
}

This is for unit testing

Functions

func MockGpuInfo added in v1.24.0

func MockGpuInfo() *[]GpuInfo

Types

type GPUFitConfig added in v1.35.0

type GPUFitConfig struct {
	MaxWorkerPerNode int `json:"maxWorkerPerNode"`

	VramWeight   float64 `json:"vramWeight"`
	TflopsWeight float64 `json:"tflopsWeight"`
}

type GPUNetworkTopologyAwareConfig added in v1.35.0

type GPUNetworkTopologyAwareConfig struct {
	TotalIntranetBandWidthGBps int64 `json:"totalIntranetBandWidthGBps"`
}

type GlobalConfig added in v1.34.0

type GlobalConfig struct {
	MetricsTTL string       `yaml:"metricsTTL"`
	AlertRules []alert.Rule `yaml:"alertRules"`
}

func MockGlobalConfig added in v1.34.0

func MockGlobalConfig() *GlobalConfig

type GpuInfo

type GpuInfo struct {
	Model         string            `json:"model"`
	Vendor        string            `json:"vendor"`
	CostPerHour   float64           `json:"costPerHour"`
	Fp16TFlops    resource.Quantity `json:"fp16TFlops"`
	FullModelName string            `json:"fullModelName"`
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL