Documentation
¶
Index ¶
Constants ¶
const (
// Name is the backend name.
Name = "vllm"
)
Variables ¶
var ( // ErrPlatformNotSupported indicates the platform is not supported. ErrPlatformNotSupported = errors.New("vllm-metal is only available on macOS ARM64") )
var ErrorNotFound = errors.New("vLLM binary not found")
Functions ¶
func GetMaxModelLen ¶
func GetMaxModelLen(modelCfg types.ModelConfig, backendCfg *inference.BackendConfiguration) *int32
GetMaxModelLen returns the max model length (context size) from model config or backend config. Model config takes precedence over backend config. Returns nil if neither is specified (vLLM will auto-derive from model).
func NeedsDeferredInstall ¶ added in v1.1.1
func NeedsDeferredInstall() bool
NeedsDeferredInstall reports whether vllm on the current platform requires deferred (on-demand) installation.
func New ¶
func New(log logging.Logger, modelManager *models.Manager, serverLog logging.Logger, opts Options) (inference.Backend, error)
New creates the appropriate vLLM backend for the current platform. On macOS ARM64, it returns the vllm-metal backend; on Linux, the standard vLLM backend. On unsupported platforms, the returned backend's Install/Run methods return errors.
Types ¶
type Config ¶
type Config struct {
// Args are the base arguments that are always included.
Args []string
}
Config is the configuration for the vLLM backend.
func NewDefaultVLLMConfig ¶
func NewDefaultVLLMConfig() *Config
NewDefaultVLLMConfig creates a new VLLMConfig with default values.
func (*Config) GetArgs ¶
func (c *Config) GetArgs(bundle types.ModelBundle, socket string, mode inference.BackendMode, config *inference.BackendConfiguration) ([]string, error)
GetArgs implements BackendConfig.GetArgs.
type Options ¶ added in v1.1.1
type Options struct {
Config *Config // Linux-only: extra vllm args (nil = defaults)
LinuxBinaryPath string // Linux: custom vllm binary path
MetalPythonPath string // macOS ARM64: custom python path
}
Options holds the configuration for the unified vLLM backend constructor.