routing

package
v1.1.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 19, 2026 License: Apache-2.0 Imports: 17 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type BackendDef added in v1.1.1

type BackendDef struct {
	// Name is the key under which the backend will be registered.
	Name string
	// Init creates the backend. It receives the model manager, which
	// is not yet available when the BackendDef slice is constructed.
	Init func(*models.Manager) (inference.Backend, error)
	// Deferred, when true, skips automatic installation at startup.
	// The backend is installed on first use instead.
	Deferred bool
}

BackendDef describes how to create a single inference backend.

func DefaultBackendDefs added in v1.1.1

func DefaultBackendDefs(cfg BackendsConfig) []BackendDef

DefaultBackendDefs returns BackendDef entries for the configured backends. It always includes llamacpp; MLX and vLLM are included based on the boolean flags.

type BackendsConfig added in v1.1.1

type BackendsConfig struct {
	// Log is the main logger passed to each backend.
	Log logging.Logger

	// ServerLogFactory creates the server-process logger for a backend.
	// If nil, Log is used directly as the server logger.
	ServerLogFactory func(backendName string) logging.Logger

	// LlamaCpp settings (always included).
	LlamaCppVendoredPath string
	LlamaCppUpdatedPath  string
	LlamaCppConfig       config.BackendConfig

	// Optional backends and their custom server paths.
	IncludeMLX bool
	MLXPath    string

	IncludeVLLM   bool
	VLLMPath      string
	VLLMMetalPath string
}

BackendsConfig configures which inference backends to create and how.

type NormalizedServeMux

type NormalizedServeMux struct {
	*http.ServeMux
}

func NewNormalizedServeMux

func NewNormalizedServeMux() *NormalizedServeMux

func NewRouter added in v1.1.1

func NewRouter(cfg RouterConfig) *NormalizedServeMux

NewRouter builds a NormalizedServeMux with the standard model-runner route structure: models endpoints, scheduler/inference endpoints, path aliases (/v1/, /rerank, /score), Ollama compatibility, and Anthropic compatibility.

func (*NormalizedServeMux) ServeHTTP

func (nm *NormalizedServeMux) ServeHTTP(w http.ResponseWriter, r *http.Request)

type RouterConfig added in v1.1.1

type RouterConfig struct {
	Log           logging.Logger
	Scheduler     *scheduling.Scheduler
	SchedulerHTTP *scheduling.HTTPHandler
	ModelHandler  *models.HTTPHandler
	ModelManager  *models.Manager

	// AllowedOrigins is forwarded to the Ollama and Anthropic handlers
	// for CORS support. It may be nil.
	AllowedOrigins []string

	// ModelHandlerMiddleware optionally wraps the model handler before
	// registration (e.g. pinata uses this for access restrictions).
	// If nil the model handler is registered directly.
	ModelHandlerMiddleware func(http.Handler) http.Handler

	// IncludeResponsesAPI enables the OpenAI Responses API compatibility
	// layer, registering it under /responses, /v1/responses, and
	// /engines/responses prefixes. Requires SchedulerHTTP to be set.
	IncludeResponsesAPI bool

	// ExtraRoutes is called after standard routes are registered,
	// allowing callers to add custom routes (root handler, metrics, etc.).
	// It may be nil.
	ExtraRoutes func(*NormalizedServeMux)
}

RouterConfig holds the dependencies needed to build the standard model-runner HTTP route structure.

type Service added in v1.1.1

type Service struct {
	ModelManager  *models.Manager
	ModelHandler  *models.HTTPHandler
	Scheduler     *scheduling.Scheduler
	SchedulerHTTP *scheduling.HTTPHandler
	Router        *NormalizedServeMux
	Backends      map[string]inference.Backend
}

Service is the assembled inference service stack.

func NewService added in v1.1.1

func NewService(cfg ServiceConfig) (*Service, error)

NewService wires up the full inference service stack from the given configuration and returns the assembled Service.

type ServiceConfig added in v1.1.1

type ServiceConfig struct {
	Log          logging.Logger
	ClientConfig models.ClientConfig

	// Backends lists the backends to initialize. Each Init function
	// is called with the model manager during NewService.
	Backends []BackendDef

	// OnBackendError is called when a backend Init returns an error.
	// If nil, a warning is logged and the backend is skipped.
	OnBackendError func(name string, err error)

	// DefaultBackendName is the key used to look up the default backend
	// (typically llamacpp.Name).
	DefaultBackendName string

	// HTTPClient is used by the scheduler for backend downloads and
	// health checks.
	HTTPClient *http.Client

	// MetricsTracker tracks inference metrics.
	MetricsTracker *metrics.Tracker

	// AllowedOrigins is forwarded to model, scheduler, Ollama, and
	// Anthropic handlers for CORS support. It may be nil.
	AllowedOrigins []string

	// ModelHandlerMiddleware optionally wraps the model handler before
	// route registration (e.g. for access restrictions).
	ModelHandlerMiddleware func(http.Handler) http.Handler

	// IncludeResponsesAPI enables the OpenAI Responses API compatibility
	// layer in the router.
	IncludeResponsesAPI bool

	// ExtraRoutes is called after the standard routes are registered.
	// The Service fields (except Router) are fully populated when this
	// is called, so the callback can reference them.
	ExtraRoutes func(*NormalizedServeMux, *Service)
}

ServiceConfig holds the parameters needed to build the full inference service stack: model manager, model handler, scheduler, and router.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL