server

package
v1.27.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 9, 2025 License: Apache-2.0 Imports: 36 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func CreateChatCompletionRequestFixture added in v1.15.0

func CreateChatCompletionRequestFixture(mutators ...func(c *v1.CreateChatCompletionRequest)) *v1.CreateChatCompletionRequest

CreateChatCompletionRequestFixture creates a test fixture representing a CreateChatCompletionRequest with default parameters. The function accepts optional mutators to modify the generated request before returning it.

func WithModel added in v1.15.0

func WithModel(modelID string) func(c *v1.CreateChatCompletionRequest)

WithModel is a mutator for the CreateChatCompletionRequestFixture to set the model id

func WithRAG added in v1.15.0

func WithRAG(c *v1.CreateChatCompletionRequest)

WithRAG is a mutator for the CreateChatCompletionRequestFixture to set a default RAG config for testing

Types

type IMS added in v1.21.0

type IMS struct {
	v1.UnimplementedInferenceServiceServer
	// contains filtered or unexported fields
}

IMS is a server for inference management services.

func NewInferenceManagementServer added in v1.21.0

func NewInferenceManagementServer(
	infProcessor *infprocessor.P,
	modelClient ModelClient,
	logger logr.Logger,
) *IMS

NewInferenceManagementServer creates a new inference management server.

func (*IMS) ActivateModel added in v1.21.0

func (s *IMS) ActivateModel(ctx context.Context, req *v1.ActivateModelRequest) (*v1.ActivateModelResponse, error)

ActivateModel activates a model.

func (*IMS) DeactivateModel added in v1.21.0

func (s *IMS) DeactivateModel(ctx context.Context, req *v1.DeactivateModelRequest) (*v1.DeactivateModelResponse, error)

DeactivateModel deactivates a model.

func (*IMS) GetInferenceStatus added in v1.21.0

func (s *IMS) GetInferenceStatus(ctx context.Context, req *v1.GetInferenceStatusRequest) (*v1.InferenceStatus, error)

GetInferenceStatus returns the inference status.

func (*IMS) Run added in v1.21.0

func (s *IMS) Run(ctx context.Context, authConfig config.AuthConfig, port int) error

Run runs the inference status server.

func (*IMS) RunWithListener added in v1.21.0

func (s *IMS) RunWithListener(ctx context.Context, authConfig config.AuthConfig, l net.Listener) error

RunWithListener runs the server with a given listener.

func (*IMS) Stop added in v1.21.0

func (s *IMS) Stop()

Stop stops the inference status server.

type IS added in v1.4.0

type IS struct {
	v1.UnimplementedInferenceInternalServiceServer
	// contains filtered or unexported fields
}

IS is a server for internal services.

func NewInternalServer added in v1.4.0

func NewInternalServer(
	infProcessor *infprocessor.P,
	taskExchanger *taskexchanger.E,
	logger logr.Logger,
) *IS

NewInternalServer creates a new internal server.

func (*IS) ProcessTasksInternal added in v1.4.0

func (is *IS) ProcessTasksInternal(srv v1.InferenceInternalService_ProcessTasksInternalServer) error

ProcessTasksInternal processes tasks.

func (*IS) Run added in v1.4.0

func (is *IS) Run(ctx context.Context, port int) error

Run runs the internal service server.

func (*IS) RunWithListener added in v1.4.0

func (is *IS) RunWithListener(ctx context.Context, l net.Listener) error

RunWithListener runs the server with a given listener.

func (*IS) Stop added in v1.4.0

func (is *IS) Stop()

Stop stops the internal service server.

type ModelClient

type ModelClient interface {
	GetModel(ctx context.Context, in *mv1.GetModelRequest, opts ...grpc.CallOption) (*mv1.Model, error)
}

ModelClient is an interface for a model client.

type NoopModelClient

type NoopModelClient struct {
}

NoopModelClient is a no-op model client.

func (*NoopModelClient) GetModel

func (c *NoopModelClient) GetModel(ctx context.Context, in *mv1.GetModelRequest, opts ...grpc.CallOption) (*mv1.Model, error)

GetModel is a no-op implementation of GetModel.

type NoopRewriter

type NoopRewriter struct {
}

NoopRewriter is a no-op rewriter.

func (*NoopRewriter) ProcessMessages

ProcessMessages is a no-op implementation of ProcessMessages.

type NoopVectorStoreClient

type NoopVectorStoreClient struct {
}

NoopVectorStoreClient is a no-op vector store client.

func (*NoopVectorStoreClient) GetVectorStoreByName

GetVectorStoreByName is a no-op implementation of GetVectorStoreByName.

type Rewriter

type Rewriter interface {
	ProcessMessages(
		ctx context.Context,
		vstore *vsv1.VectorStore,
		messages []*v1.CreateChatCompletionRequest_Message,
	) ([]*v1.CreateChatCompletionRequest_Message, error)
}

Rewriter is an interface for rag.

type S

type S struct {
	v1.UnimplementedChatServiceServer
	// contains filtered or unexported fields
}

S is a server.

func New

func New(
	m metricsMonitoring,
	usage sender.UsageSetter,
	rate rate.Limiter,
	modelClient ModelClient,
	vsClient VectorStoreClient,
	r Rewriter,
	taskSender taskSender,
	logger logr.Logger,
) *S

New creates a server.

func (*S) CreateChatCompletion

func (s *S) CreateChatCompletion(
	w http.ResponseWriter,
	req *http.Request,
	pathParams map[string]string,
)

CreateChatCompletion creates a chat completion.

func (*S) CreateCompletion

func (s *S) CreateCompletion(
	w http.ResponseWriter,
	req *http.Request,
	pathParams map[string]string,
)

CreateCompletion creates a (legacy) completion.

The implementation is similar to CreateChatCompletion, but this has extra logic for converting a legacy request to a non-legacy request (and vice versa for response).

TODO(kenji): Avoid code duplication CreateChatCompletion.

func (*S) CreateEmbedding

func (s *S) CreateEmbedding(
	w http.ResponseWriter,
	req *http.Request,
	pathParams map[string]string,
)

CreateEmbedding creates an embedding.

func (*S) GracefulStop added in v1.25.0

func (s *S) GracefulStop()

GracefulStop gracefully stops the gRPC server.

func (*S) Run

func (s *S) Run(ctx context.Context, port int, authConfig config.AuthConfig) error

Run starts the gRPC server.

func (*S) Stop

func (s *S) Stop()

Stop stops the gRPC server.

type VectorStoreClient

type VectorStoreClient interface {
	GetVectorStoreByName(ctx context.Context, req *vsv1.GetVectorStoreByNameRequest, opts ...grpc.CallOption) (*vsv1.VectorStore, error)
}

VectorStoreClient is an interface for a vector store client.

type WS

type WS struct {
	v1.UnimplementedInferenceWorkerServiceServer
	// contains filtered or unexported fields
}

WS is a server for worker services.

func NewWorkerServiceServer

func NewWorkerServiceServer(infProcessor *infprocessor.P, logger logr.Logger) *WS

NewWorkerServiceServer creates a new worker service server.

func (*WS) GracefulStop added in v1.25.0

func (ws *WS) GracefulStop()

GracefulStop gracefully stops the worker service server.

func (*WS) ProcessTasks

func (ws *WS) ProcessTasks(srv v1.InferenceWorkerService_ProcessTasksServer) error

ProcessTasks processes tasks.

func (*WS) Run

func (ws *WS) Run(ctx context.Context, port int, authConfig config.AuthConfig, tlsConfig *config.TLS) error

Run runs the worker service server.

func (*WS) RunWithListener added in v1.4.0

func (ws *WS) RunWithListener(ctx context.Context, authConfig config.AuthConfig, tlsConfig *config.TLS, l net.Listener) error

RunWithListener runs the server with a given listener.

func (*WS) Stop

func (ws *WS) Stop()

Stop stops the worker service server.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL