Documentation
¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type BackendDef ¶ added in v1.1.1
type BackendDef struct {
// Name is the key under which the backend will be registered.
Name string
// Init creates the backend. It receives the model manager, which
// is not yet available when the BackendDef slice is constructed.
Init func(*models.Manager) (inference.Backend, error)
// Deferred, when true, skips automatic installation at startup.
// The backend is installed on first use instead.
Deferred bool
}
BackendDef describes how to create a single inference backend.
func DefaultBackendDefs ¶ added in v1.1.1
func DefaultBackendDefs(cfg BackendsConfig) []BackendDef
DefaultBackendDefs returns BackendDef entries for the configured backends. It always includes llamacpp; MLX and vLLM are included based on the boolean flags.
type BackendsConfig ¶ added in v1.1.1
type BackendsConfig struct {
// Log is the main logger passed to each backend.
Log logging.Logger
// ServerLogFactory creates the server-process logger for a backend.
// If nil, Log is used directly as the server logger.
ServerLogFactory func(backendName string) logging.Logger
// LlamaCpp settings (always included).
LlamaCppVendoredPath string
LlamaCppUpdatedPath string
LlamaCppConfig config.BackendConfig
// Optional backends and their custom server paths.
IncludeMLX bool
MLXPath string
IncludeVLLM bool
VLLMPath string
VLLMMetalPath string
}
BackendsConfig configures which inference backends to create and how.
type NormalizedServeMux ¶
func NewNormalizedServeMux ¶
func NewNormalizedServeMux() *NormalizedServeMux
func NewRouter ¶ added in v1.1.1
func NewRouter(cfg RouterConfig) *NormalizedServeMux
NewRouter builds a NormalizedServeMux with the standard model-runner route structure: models endpoints, scheduler/inference endpoints, path aliases (/v1/, /rerank, /score), Ollama compatibility, and Anthropic compatibility.
func (*NormalizedServeMux) ServeHTTP ¶
func (nm *NormalizedServeMux) ServeHTTP(w http.ResponseWriter, r *http.Request)
type RouterConfig ¶ added in v1.1.1
type RouterConfig struct {
Log logging.Logger
Scheduler *scheduling.Scheduler
SchedulerHTTP *scheduling.HTTPHandler
ModelHandler *models.HTTPHandler
ModelManager *models.Manager
// AllowedOrigins is forwarded to the Ollama and Anthropic handlers
// for CORS support. It may be nil.
AllowedOrigins []string
// ModelHandlerMiddleware optionally wraps the model handler before
// registration (e.g. pinata uses this for access restrictions).
// If nil the model handler is registered directly.
ModelHandlerMiddleware func(http.Handler) http.Handler
// IncludeResponsesAPI enables the OpenAI Responses API compatibility
// layer, registering it under /responses, /v1/responses, and
// /engines/responses prefixes. Requires SchedulerHTTP to be set.
IncludeResponsesAPI bool
// ExtraRoutes is called after standard routes are registered,
// allowing callers to add custom routes (root handler, metrics, etc.).
// It may be nil.
ExtraRoutes func(*NormalizedServeMux)
}
RouterConfig holds the dependencies needed to build the standard model-runner HTTP route structure.
type Service ¶ added in v1.1.1
type Service struct {
ModelManager *models.Manager
ModelHandler *models.HTTPHandler
Scheduler *scheduling.Scheduler
SchedulerHTTP *scheduling.HTTPHandler
Router *NormalizedServeMux
Backends map[string]inference.Backend
}
Service is the assembled inference service stack.
func NewService ¶ added in v1.1.1
func NewService(cfg ServiceConfig) (*Service, error)
NewService wires up the full inference service stack from the given configuration and returns the assembled Service.
type ServiceConfig ¶ added in v1.1.1
type ServiceConfig struct {
Log logging.Logger
ClientConfig models.ClientConfig
// Backends lists the backends to initialize. Each Init function
// is called with the model manager during NewService.
Backends []BackendDef
// OnBackendError is called when a backend Init returns an error.
// If nil, a warning is logged and the backend is skipped.
OnBackendError func(name string, err error)
// DefaultBackendName is the key used to look up the default backend
// (typically llamacpp.Name).
DefaultBackendName string
// HTTPClient is used by the scheduler for backend downloads and
// health checks.
HTTPClient *http.Client
// MetricsTracker tracks inference metrics.
MetricsTracker *metrics.Tracker
// AllowedOrigins is forwarded to model, scheduler, Ollama, and
// Anthropic handlers for CORS support. It may be nil.
AllowedOrigins []string
// ModelHandlerMiddleware optionally wraps the model handler before
// route registration (e.g. for access restrictions).
ModelHandlerMiddleware func(http.Handler) http.Handler
// IncludeResponsesAPI enables the OpenAI Responses API compatibility
// layer in the router.
IncludeResponsesAPI bool
// ExtraRoutes is called after the standard routes are registered.
// The Service fields (except Router) are fully populated when this
// is called, so the callback can reference them.
ExtraRoutes func(*NormalizedServeMux, *Service)
}
ServiceConfig holds the parameters needed to build the full inference service stack: model manager, model handler, scheduler, and router.