Documentation
¶
Index ¶
- type EngineStatus
- type P
- func (p *P) AddOrUpdateEngineStatus(taskSender TaskSender, engineStatus *v1.EngineStatus, tenantID string, ...)
- func (p *P) DumpStatus() *Status
- func (p *P) DumpTenantStatus(tenantID string) *TenantStatus
- func (p *P) LastEngineHeartbeats() map[string]time.Time
- func (p *P) LocalEngines() map[string][]*v1.EngineStatus
- func (p *P) MaxInProgressTaskDuration() time.Duration
- func (p *P) NumEnginesByTenantID() map[string]int
- func (p *P) NumInProgressTasks() int
- func (p *P) NumLocalEnginesByTenantID() map[string]int
- func (p *P) NumQueuedTasks() int32
- func (p *P) ProcessTaskResult(taskResult *v1.TaskResult)
- func (p *P) RemoveEngine(engineID string, tenantID string)
- func (p *P) Run(ctx context.Context) error
- func (p *P) SendAndProcessTask(ctx context.Context, origTask *v1.Task, tenantID string, ...) error
- func (p *P) SendAudioTranscriptionTask(ctx context.Context, tenantID string, req *v1.CreateAudioTranscriptionRequest, ...) (*http.Response, *ProcessingStats, error)
- func (p *P) SendChatCompletionTask(ctx context.Context, tenantID string, req *v1.CreateChatCompletionRequest, ...) (*http.Response, *ProcessingStats, error)
- func (p *P) SendEmbeddingTask(ctx context.Context, tenantID string, req *v1.CreateEmbeddingRequest, ...) (*http.Response, *ProcessingStats, error)
- func (p *P) SendGoAwayTaskToLocalEngines(ctx context.Context) error
- func (p *P) SendHeartbeatTaskToEngines(ctx context.Context, timeout time.Duration) error
- func (p *P) SendModelResponseTask(ctx context.Context, tenantID string, req *v1.CreateModelResponseRequest, ...) (*http.Response, *ProcessingStats, error)
- func (p *P) SendTokenizeTask(ctx context.Context, tenantID string, req *v1.TokenizeRequest, ...) (*http.Response, *ProcessingStats, error)
- func (p *P) SetMetricsMonitor(metrisMonitor metricsMonitor)
- type ProcessingStats
- type Status
- type TaskSender
- type TaskStatus
- type TenantStatus
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type EngineStatus ¶
type EngineStatus struct {
Tasks []*TaskStatus `json:"tasks"`
IsLocal bool `json:"isLocal"`
Ready bool `json:"ready"`
Models []*v1.EngineStatus_Model `json:"models"`
ClusterID string `json:"clusterId"`
}
EngineStatus is the status of an engine.
type P ¶
type P struct {
// contains filtered or unexported fields
}
P processes inference tasks.
func (*P) AddOrUpdateEngineStatus ¶
func (p *P) AddOrUpdateEngineStatus( taskSender TaskSender, engineStatus *v1.EngineStatus, tenantID string, isLocal bool, )
AddOrUpdateEngineStatus adds or updates the engine status.
func (*P) DumpStatus ¶
DumpStatus dumps the status of the processor.
func (*P) DumpTenantStatus ¶ added in v1.24.0
func (p *P) DumpTenantStatus(tenantID string) *TenantStatus
DumpTenantStatus dumps the status of a tenant.
func (*P) LastEngineHeartbeats ¶ added in v1.27.0
LastEngineHeartbeats returns the last heartbeat time of each engine.
func (*P) LocalEngines ¶ added in v1.4.0
func (p *P) LocalEngines() map[string][]*v1.EngineStatus
LocalEngines returns the local engine statuses grouped by tenant ID.
func (*P) MaxInProgressTaskDuration ¶
MaxInProgressTaskDuration returns the maximum duration of in-progress tasks.
func (*P) NumEnginesByTenantID ¶
NumEnginesByTenantID returns the number of engines by tenant ID.
func (*P) NumInProgressTasks ¶
NumInProgressTasks returns the number of in-progress tasks.
func (*P) NumLocalEnginesByTenantID ¶ added in v1.27.0
NumLocalEnginesByTenantID returns the number of localengines by tenant ID.
func (*P) NumQueuedTasks ¶
NumQueuedTasks returns the number of queued tasks.
func (*P) ProcessTaskResult ¶
func (p *P) ProcessTaskResult(taskResult *v1.TaskResult)
ProcessTaskResult processes the task result.
func (*P) RemoveEngine ¶
RemoveEngine removes the engine.
func (*P) SendAndProcessTask ¶ added in v1.4.0
func (p *P) SendAndProcessTask( ctx context.Context, origTask *v1.Task, tenantID string, processResult func(*v1.TaskResult) error, ) error
SendAndProcessTask sends a task and processes the results.
func (*P) SendAudioTranscriptionTask ¶ added in v1.31.0
func (p *P) SendAudioTranscriptionTask( ctx context.Context, tenantID string, req *v1.CreateAudioTranscriptionRequest, header http.Header, ) (*http.Response, *ProcessingStats, error)
SendAudioTranscriptionTask sends an audio transcription task.
func (*P) SendChatCompletionTask ¶
func (p *P) SendChatCompletionTask( ctx context.Context, tenantID string, req *v1.CreateChatCompletionRequest, header http.Header, ) (*http.Response, *ProcessingStats, error)
SendChatCompletionTask sends a chat completion task.
func (*P) SendEmbeddingTask ¶
func (p *P) SendEmbeddingTask( ctx context.Context, tenantID string, req *v1.CreateEmbeddingRequest, header http.Header, ) (*http.Response, *ProcessingStats, error)
SendEmbeddingTask sends an embedding task.
func (*P) SendGoAwayTaskToLocalEngines ¶ added in v1.25.0
SendGoAwayTaskToLocalEngines sends a go away task to local engines.
func (*P) SendHeartbeatTaskToEngines ¶ added in v1.27.0
SendHeartbeatTaskToEngines sends a heartbeat task to engines.
func (*P) SendModelResponseTask ¶ added in v1.36.0
func (p *P) SendModelResponseTask( ctx context.Context, tenantID string, req *v1.CreateModelResponseRequest, header http.Header, ) (*http.Response, *ProcessingStats, error)
SendModelResponseTask sends a model response task.
func (*P) SendTokenizeTask ¶ added in v1.42.0
func (p *P) SendTokenizeTask( ctx context.Context, tenantID string, req *v1.TokenizeRequest, header http.Header, ) (*http.Response, *ProcessingStats, error)
SendTokenizeTask sends a tokenize task.
func (*P) SetMetricsMonitor ¶ added in v1.30.0
func (p *P) SetMetricsMonitor(metrisMonitor metricsMonitor)
SetMetricsMonitor sets the metrics monitor for the processor.
type ProcessingStats ¶ added in v1.38.0
type ProcessingStats struct {
// contains filtered or unexported fields
}
ProcessingStats holds stats about processing.
func (*ProcessingStats) RuntimeLatencyMs ¶ added in v1.38.0
func (s *ProcessingStats) RuntimeLatencyMs() int32
RuntimeLatencyMs returns the runtime latency in milliseconds.
func (*ProcessingStats) RuntimeTimeToFirstTokenMs ¶ added in v1.38.0
func (s *ProcessingStats) RuntimeTimeToFirstTokenMs() int32
RuntimeTimeToFirstTokenMs returns the time to first token in milliseconds.
type Status ¶
type Status struct {
Tenants map[string]*TenantStatus `json:"tenants"`
}
Status is the status of the processor.
type TaskSender ¶ added in v1.4.0
type TaskSender interface {
Send(*v1.ProcessTasksResponse) error
}
TaskSender sends a new task to the engine.
type TaskStatus ¶
TaskStatus is the status of a task.
type TenantStatus ¶
type TenantStatus struct {
Engines map[string]*EngineStatus `json:"engines"`
}
TenantStatus is the status of a tenant.