Documentation
¶
Index ¶
- Constants
- type Backend
- type LLAMACPP
- func (l *LLAMACPP) Args(models []*coreapi.OpenModel, involvedRole coreapi.ModelRole) []string
- func (l *LLAMACPP) DefaultCommand() []string
- func (l *LLAMACPP) DefaultResources() inferenceapi.ResourceRequirements
- func (l *LLAMACPP) DefaultVersion() string
- func (l *LLAMACPP) Image(version string) string
- func (l *LLAMACPP) Name() inferenceapi.BackendName
- type SGLANG
- func (s *SGLANG) Args(models []*coreapi.OpenModel, involvedRole coreapi.ModelRole) []string
- func (s *SGLANG) DefaultCommand() []string
- func (s *SGLANG) DefaultResources() inferenceapi.ResourceRequirements
- func (s *SGLANG) DefaultVersion() string
- func (s *SGLANG) Image(version string) string
- func (s *SGLANG) Name() inferenceapi.BackendName
- type SpeculativeBackend
- type VLLM
- func (v *VLLM) Args(models []*coreapi.OpenModel, involvedRole coreapi.ModelRole) []string
- func (v *VLLM) DefaultCommand() []string
- func (v *VLLM) DefaultResources() inferenceapi.ResourceRequirements
- func (v *VLLM) DefaultVersion() string
- func (v *VLLM) Image(version string) string
- func (v *VLLM) Name() inferenceapi.BackendName
Constants ¶
View Source
const (
DEFAULT_BACKEND_PORT = 8080
)
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Backend ¶
type Backend interface {
// Name returns the inference backend name in this project.
Name() inferenceapi.BackendName
// Image returns the container image for the inference backend.
Image(version string) string
// DefaultVersion returns the default version for the inference backend.
DefaultVersion() string
// DefaultResources returns the default resources set for the container.
DefaultResources() inferenceapi.ResourceRequirements
// DefaultCommand returns the command to start the inference backend.
DefaultCommand() []string
// Args returns the bootstrap arguments to start the backend.
// The second parameter represents which particular modelRole involved, like draft.
Args([]*coreapi.OpenModel, coreapi.ModelRole) []string
}
Backend represents the inference engine, such as vllm.
func SwitchBackend ¶
func SwitchBackend(name inferenceapi.BackendName) Backend
type LLAMACPP ¶
type LLAMACPP struct{}
func (*LLAMACPP) DefaultCommand ¶ added in v0.0.6
func (*LLAMACPP) DefaultResources ¶
func (l *LLAMACPP) DefaultResources() inferenceapi.ResourceRequirements
func (*LLAMACPP) DefaultVersion ¶
func (*LLAMACPP) Name ¶
func (l *LLAMACPP) Name() inferenceapi.BackendName
type SGLANG ¶
type SGLANG struct{}
func (*SGLANG) DefaultCommand ¶ added in v0.0.6
func (*SGLANG) DefaultResources ¶
func (s *SGLANG) DefaultResources() inferenceapi.ResourceRequirements
func (*SGLANG) DefaultVersion ¶
func (*SGLANG) Name ¶
func (s *SGLANG) Name() inferenceapi.BackendName
type SpeculativeBackend ¶ added in v0.0.6
type SpeculativeBackend interface {
// contains filtered or unexported methods
}
SpeculativeBackend represents backend supports speculativeDecoding inferenceMode.
type VLLM ¶
type VLLM struct{}
func (*VLLM) DefaultCommand ¶ added in v0.0.6
func (*VLLM) DefaultResources ¶
func (v *VLLM) DefaultResources() inferenceapi.ResourceRequirements
func (*VLLM) DefaultVersion ¶
func (*VLLM) Name ¶
func (v *VLLM) Name() inferenceapi.BackendName
Click to show internal directories.
Click to hide internal directories.