Documentation
¶
Index ¶
- type ActiveRunner
- type AggregatedMetricsHandler
- type ModelData
- type ModelRecordsResponse
- type OpenAIRecorder
- func (r *OpenAIRecorder) GetRecordsHandler() http.HandlerFunc
- func (r *OpenAIRecorder) NewResponseRecorder(w http.ResponseWriter) http.ResponseWriter
- func (r *OpenAIRecorder) RecordRequest(model string, req *http.Request, body []byte) string
- func (r *OpenAIRecorder) RecordResponse(id, model string, rw http.ResponseWriter)
- func (r *OpenAIRecorder) RemoveModel(model string)
- func (r *OpenAIRecorder) SetConfigForModel(model string, config *inference.BackendConfiguration)
- type RequestResponsePair
- type SchedulerInterface
- type SchedulerMetricsHandler
- type StreamingError
- type Tracker
- type TrackerRoundTripper
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type ActiveRunner ¶
ActiveRunner contains information about an active runner
type AggregatedMetricsHandler ¶
type AggregatedMetricsHandler struct {
// contains filtered or unexported fields
}
AggregatedMetricsHandler collects metrics from all active runners and aggregates them with labels
func NewAggregatedMetricsHandler ¶
func NewAggregatedMetricsHandler(log logging.Logger, scheduler SchedulerInterface) *AggregatedMetricsHandler
NewAggregatedMetricsHandler creates a new aggregated metrics handler
func (*AggregatedMetricsHandler) ServeHTTP ¶
func (h *AggregatedMetricsHandler) ServeHTTP(w http.ResponseWriter, r *http.Request)
ServeHTTP implements http.Handler for aggregated metrics
type ModelData ¶
type ModelData struct {
Config inference.BackendConfiguration `json:"config"`
Records []*RequestResponsePair `json:"records"`
}
type ModelRecordsResponse ¶
type OpenAIRecorder ¶
type OpenAIRecorder struct {
// contains filtered or unexported fields
}
func NewOpenAIRecorder ¶
func NewOpenAIRecorder(log logging.Logger, modelManager *models.Manager) *OpenAIRecorder
func (*OpenAIRecorder) GetRecordsHandler ¶
func (r *OpenAIRecorder) GetRecordsHandler() http.HandlerFunc
func (*OpenAIRecorder) NewResponseRecorder ¶
func (r *OpenAIRecorder) NewResponseRecorder(w http.ResponseWriter) http.ResponseWriter
func (*OpenAIRecorder) RecordRequest ¶
func (*OpenAIRecorder) RecordResponse ¶
func (r *OpenAIRecorder) RecordResponse(id, model string, rw http.ResponseWriter)
func (*OpenAIRecorder) RemoveModel ¶
func (r *OpenAIRecorder) RemoveModel(model string)
func (*OpenAIRecorder) SetConfigForModel ¶
func (r *OpenAIRecorder) SetConfigForModel(model string, config *inference.BackendConfiguration)
type RequestResponsePair ¶
type RequestResponsePair struct {
ID string `json:"id"`
Model string `json:"model"`
Method string `json:"method"`
URL string `json:"url"`
Request string `json:"request"`
Response string `json:"response,omitempty"`
Error string `json:"error,omitempty"`
Timestamp int64 `json:"timestamp"`
StatusCode int `json:"status_code"`
UserAgent string `json:"user_agent,omitempty"`
}
type SchedulerInterface ¶
type SchedulerInterface interface {
GetRunningBackends(w http.ResponseWriter, r *http.Request)
GetLlamaCppSocket() (string, error)
GetAllActiveRunners() []ActiveRunner
}
SchedulerInterface defines the methods we need from the scheduler
type SchedulerMetricsHandler ¶
type SchedulerMetricsHandler struct {
// contains filtered or unexported fields
}
SchedulerMetricsHandler handles metrics requests by finding active llama.cpp runners
func (*SchedulerMetricsHandler) ServeHTTP ¶
func (h *SchedulerMetricsHandler) ServeHTTP(w http.ResponseWriter, r *http.Request)
ServeHTTP implements http.Handler for metrics proxying via scheduler
type StreamingError ¶
type StreamingError struct {
StatusCode int `json:"status_code"`
Message string `json:"message"`
Type string `json:"type,omitempty"`
Details string `json:"details,omitempty"`
}
StreamingError represents an error that occurred during streaming response processing. It contains the HTTP status code and additional context about the error.
func (*StreamingError) Error ¶
func (e *StreamingError) Error() string
Error implements the error interface for StreamingError.
func (*StreamingError) GetStatusCode ¶
func (e *StreamingError) GetStatusCode() int
GetStatusCode returns the HTTP status code associated with this streaming error.
type TrackerRoundTripper ¶
type TrackerRoundTripper struct {
Transport http.RoundTripper
}