Documentation
¶
Index ¶
- Constants
- Variables
- func BuildAutoscalingPolicy(autoscalingConfig *workload.AutoscalingPolicySpec, ...) *workload.AutoscalingPolicy
- func BuildModelRoute(model *workload.ModelBooster) *networking.ModelRoute
- func BuildModelServer(model *workload.ModelBooster) ([]*networking.ModelServer, error)
- func BuildModelServing(model *workload.ModelBooster) (*workload.ModelServing, error)
- func BuildPolicyBindingMeta(spec *workload.AutoscalingPolicyBindingSpec, model *workload.ModelBooster, ...) *metav1.ObjectMeta
- func BuildScalingPolicyBinding(model *workload.ModelBooster, backend *workload.ModelBackend, name string) *workload.AutoscalingPolicyBinding
- func BuildScalingPolicyBindingSpec(backend *workload.ModelBackend, name string) *workload.AutoscalingPolicyBindingSpec
- func GetCachePath(path string) string
- func GetMountPath(modelURI string) string
Constants ¶
View Source
const ( CacheURIPrefixPVC = "pvc://" CacheURIPrefixHostPath = "hostpath://" URIPrefixSeparator = "://" VllmTemplatePath = "templates/vllm.yaml" VllmDisaggregatedTemplatePath = "templates/vllm-pd.yaml" VllmMultiNodeServingScriptPath = "examples/online_serving/multi-node-serving.sh" )
Variables ¶
View Source
var VLLMKvConnectorType = map[string]networking.KVConnectorType{ "MooncakeConnector": networking.ConnectorTypeMoonCake, "NixlConnector": networking.ConnectorTypeNIXL, "LMCacheConnectorV1": networking.ConnectorTypeLMCache, }
Functions ¶
func BuildAutoscalingPolicy ¶
func BuildAutoscalingPolicy(autoscalingConfig *workload.AutoscalingPolicySpec, model *workload.ModelBooster, backendName string) *workload.AutoscalingPolicy
func BuildModelRoute ¶
func BuildModelRoute(model *workload.ModelBooster) *networking.ModelRoute
func BuildModelServer ¶
func BuildModelServer(model *workload.ModelBooster) ([]*networking.ModelServer, error)
BuildModelServer creates arrays of ModelServer for the given model. Each model backend will create one model server.
func BuildModelServing ¶
func BuildModelServing(model *workload.ModelBooster) (*workload.ModelServing, error)
BuildModelServing creates a ModelServing object based on the model's backend.
func BuildPolicyBindingMeta ¶
func BuildPolicyBindingMeta(spec *workload.AutoscalingPolicyBindingSpec, model *workload.ModelBooster, backendName string, name string) *metav1.ObjectMeta
func BuildScalingPolicyBinding ¶
func BuildScalingPolicyBinding(model *workload.ModelBooster, backend *workload.ModelBackend, name string) *workload.AutoscalingPolicyBinding
func BuildScalingPolicyBindingSpec ¶
func BuildScalingPolicyBindingSpec(backend *workload.ModelBackend, name string) *workload.AutoscalingPolicyBindingSpec
func GetCachePath ¶
GetCachePath gets the path from string after "://". For example, for "pvc://my-pvc", it returns "/my-pvc".
func GetMountPath ¶
GetMountPath returns the mount path for the given ModelBackend in the format "/<backend.Name>".
Types ¶
This section is empty.
Click to show internal directories.
Click to hide internal directories.