infprocessor

package
v1.46.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 27, 2026 License: Apache-2.0 Imports: 13 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type EngineStatus

type EngineStatus struct {
	Tasks   []*TaskStatus `json:"tasks"`
	IsLocal bool          `json:"isLocal"`
	Ready   bool          `json:"ready"`

	Models    []*v1.EngineStatus_Model `json:"models"`
	ClusterID string                   `json:"clusterId"`
}

EngineStatus is the status of an engine.

type P

type P struct {
	// contains filtered or unexported fields
}

P processes inference tasks.

func NewP

func NewP(engineRouter engineRouter, logger logr.Logger) *P

NewP creates a new processor.

func (*P) AddOrUpdateEngineStatus

func (p *P) AddOrUpdateEngineStatus(
	taskSender TaskSender,
	engineStatus *v1.EngineStatus,
	tenantID string,
	isLocal bool,
)

AddOrUpdateEngineStatus adds or updates the engine status.

func (*P) DumpStatus

func (p *P) DumpStatus() *Status

DumpStatus dumps the status of the processor.

func (*P) DumpTenantStatus added in v1.24.0

func (p *P) DumpTenantStatus(tenantID string) *TenantStatus

DumpTenantStatus dumps the status of a tenant.

func (*P) LastEngineHeartbeats added in v1.27.0

func (p *P) LastEngineHeartbeats() map[string]time.Time

LastEngineHeartbeats returns the last heartbeat time of each engine.

func (*P) LocalEngines added in v1.4.0

func (p *P) LocalEngines() map[string][]*v1.EngineStatus

LocalEngines returns the local engine statuses grouped by tenant ID.

func (*P) MaxInProgressTaskDuration

func (p *P) MaxInProgressTaskDuration() time.Duration

MaxInProgressTaskDuration returns the maximum duration of in-progress tasks.

func (*P) NumEnginesByTenantID

func (p *P) NumEnginesByTenantID() map[string]int

NumEnginesByTenantID returns the number of engines by tenant ID.

func (*P) NumInProgressTasks

func (p *P) NumInProgressTasks() int

NumInProgressTasks returns the number of in-progress tasks.

func (*P) NumLocalEnginesByTenantID added in v1.27.0

func (p *P) NumLocalEnginesByTenantID() map[string]int

NumLocalEnginesByTenantID returns the number of localengines by tenant ID.

func (*P) NumQueuedTasks

func (p *P) NumQueuedTasks() int32

NumQueuedTasks returns the number of queued tasks.

func (*P) ProcessTaskResult

func (p *P) ProcessTaskResult(taskResult *v1.TaskResult)

ProcessTaskResult processes the task result.

func (*P) RemoveEngine

func (p *P) RemoveEngine(engineID string, tenantID string)

RemoveEngine removes the engine.

func (*P) Run

func (p *P) Run(ctx context.Context) error

Run runs the processor.

func (*P) SendAndProcessTask added in v1.4.0

func (p *P) SendAndProcessTask(
	ctx context.Context,
	origTask *v1.Task,
	tenantID string,
	processResult func(*v1.TaskResult) error,
) error

SendAndProcessTask sends a task and processes the results.

func (*P) SendAudioTranscriptionTask added in v1.31.0

func (p *P) SendAudioTranscriptionTask(
	ctx context.Context,
	tenantID string,
	req *v1.CreateAudioTranscriptionRequest,
	header http.Header,
) (*http.Response, *ProcessingStats, error)

SendAudioTranscriptionTask sends an audio transcription task.

func (*P) SendChatCompletionTask

func (p *P) SendChatCompletionTask(
	ctx context.Context,
	tenantID string,
	req *v1.CreateChatCompletionRequest,
	header http.Header,
) (*http.Response, *ProcessingStats, error)

SendChatCompletionTask sends a chat completion task.

func (*P) SendEmbeddingTask

func (p *P) SendEmbeddingTask(
	ctx context.Context,
	tenantID string,
	req *v1.CreateEmbeddingRequest,
	header http.Header,
) (*http.Response, *ProcessingStats, error)

SendEmbeddingTask sends an embedding task.

func (*P) SendGoAwayTaskToLocalEngines added in v1.25.0

func (p *P) SendGoAwayTaskToLocalEngines(ctx context.Context) error

SendGoAwayTaskToLocalEngines sends a go away task to local engines.

func (*P) SendHeartbeatTaskToEngines added in v1.27.0

func (p *P) SendHeartbeatTaskToEngines(ctx context.Context, timeout time.Duration) error

SendHeartbeatTaskToEngines sends a heartbeat task to engines.

func (*P) SendModelResponseTask added in v1.36.0

func (p *P) SendModelResponseTask(
	ctx context.Context,
	tenantID string,
	req *v1.CreateModelResponseRequest,
	header http.Header,
) (*http.Response, *ProcessingStats, error)

SendModelResponseTask sends a model response task.

func (*P) SendTokenizeTask added in v1.42.0

func (p *P) SendTokenizeTask(
	ctx context.Context,
	tenantID string,
	req *v1.TokenizeRequest,
	header http.Header,
) (*http.Response, *ProcessingStats, error)

SendTokenizeTask sends a tokenize task.

func (*P) SetMetricsMonitor added in v1.30.0

func (p *P) SetMetricsMonitor(metrisMonitor metricsMonitor)

SetMetricsMonitor sets the metrics monitor for the processor.

type ProcessingStats added in v1.38.0

type ProcessingStats struct {
	// contains filtered or unexported fields
}

ProcessingStats holds stats about processing.

func (*ProcessingStats) RuntimeLatencyMs added in v1.38.0

func (s *ProcessingStats) RuntimeLatencyMs() int32

RuntimeLatencyMs returns the runtime latency in milliseconds.

func (*ProcessingStats) RuntimeTimeToFirstTokenMs added in v1.38.0

func (s *ProcessingStats) RuntimeTimeToFirstTokenMs() int32

RuntimeTimeToFirstTokenMs returns the time to first token in milliseconds.

type Status

type Status struct {
	Tenants map[string]*TenantStatus `json:"tenants"`
}

Status is the status of the processor.

type TaskSender added in v1.4.0

type TaskSender interface {
	Send(*v1.ProcessTasksResponse) error
}

TaskSender sends a new task to the engine.

type TaskStatus

type TaskStatus struct {
	ID      string `json:"id"`
	ModelID string `json:"modelId"`
}

TaskStatus is the status of a task.

type TenantStatus

type TenantStatus struct {
	Engines map[string]*EngineStatus `json:"engines"`
}

TenantStatus is the status of a tenant.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL