Documentation
¶
Index ¶
- func CreateChatCompletionRequestFixture(mutators ...func(c *v1.CreateChatCompletionRequest)) *v1.CreateChatCompletionRequest
- func WithModel(modelID string) func(c *v1.CreateChatCompletionRequest)
- func WithRAG(c *v1.CreateChatCompletionRequest)
- type IMS
- func (s *IMS) GetInferenceStatus(ctx context.Context, req *v1.GetInferenceStatusRequest) (*v1.InferenceStatus, error)
- func (s *IMS) Run(ctx context.Context, authConfig config.AuthConfig, port int) error
- func (s *IMS) RunWithListener(ctx context.Context, authConfig config.AuthConfig, l net.Listener) error
- func (s *IMS) Stop()
- type IS
- type ModelClient
- type NoopModelClient
- type NoopRewriter
- type NoopVectorStoreClient
- type Rewriter
- type S
- func (s *S) CreateAudioTranscription(w http.ResponseWriter, req *http.Request, pathParams map[string]string)
- func (s *S) CreateChatCompletion(w http.ResponseWriter, req *http.Request, pathParams map[string]string)
- func (s *S) CreateCompletion(w http.ResponseWriter, req *http.Request, pathParams map[string]string)
- func (s *S) CreateEmbedding(w http.ResponseWriter, req *http.Request, pathParams map[string]string)
- func (s *S) CreateModelResponse(w http.ResponseWriter, req *http.Request, pathParams map[string]string)
- func (s *S) GracefulStop()
- func (s *S) Run(ctx context.Context, port int, authConfig config.AuthConfig) error
- func (s *S) Stop()
- func (s *S) Tokenize(w http.ResponseWriter, req *http.Request, pathParams map[string]string)
- type VectorStoreClient
- type WS
- func (ws *WS) GracefulStop()
- func (ws *WS) ProcessTasks(srv v1.InferenceWorkerService_ProcessTasksServer) error
- func (ws *WS) Run(ctx context.Context, port int, authConfig config.AuthConfig, ...) error
- func (ws *WS) RunWithListener(ctx context.Context, authConfig config.AuthConfig, tlsConfig *config.TLS, ...) error
- func (ws *WS) Stop()
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func CreateChatCompletionRequestFixture ¶ added in v1.15.0
func CreateChatCompletionRequestFixture(mutators ...func(c *v1.CreateChatCompletionRequest)) *v1.CreateChatCompletionRequest
CreateChatCompletionRequestFixture creates a test fixture representing a CreateChatCompletionRequest with default parameters. The function accepts optional mutators to modify the generated request before returning it.
func WithModel ¶ added in v1.15.0
func WithModel(modelID string) func(c *v1.CreateChatCompletionRequest)
WithModel is a mutator for the CreateChatCompletionRequestFixture to set the model id
func WithRAG ¶ added in v1.15.0
func WithRAG(c *v1.CreateChatCompletionRequest)
WithRAG is a mutator for the CreateChatCompletionRequestFixture to set a default RAG config for testing
Types ¶
type IMS ¶ added in v1.21.0
type IMS struct {
v1.UnimplementedInferenceServiceServer
// contains filtered or unexported fields
}
IMS is a server for inference management services.
func NewInferenceManagementServer ¶ added in v1.21.0
func NewInferenceManagementServer( infProcessor *infprocessor.P, logger logr.Logger, ) *IMS
NewInferenceManagementServer creates a new inference management server.
func (*IMS) GetInferenceStatus ¶ added in v1.21.0
func (s *IMS) GetInferenceStatus(ctx context.Context, req *v1.GetInferenceStatusRequest) (*v1.InferenceStatus, error)
GetInferenceStatus returns the inference status.
func (*IMS) RunWithListener ¶ added in v1.21.0
func (s *IMS) RunWithListener(ctx context.Context, authConfig config.AuthConfig, l net.Listener) error
RunWithListener runs the server with a given listener.
type IS ¶ added in v1.4.0
type IS struct {
v1.UnimplementedInferenceInternalServiceServer
// contains filtered or unexported fields
}
IS is a server for internal services.
func NewInternalServer ¶ added in v1.4.0
func NewInternalServer( infProcessor *infprocessor.P, taskExchanger *taskexchanger.E, logger logr.Logger, ) *IS
NewInternalServer creates a new internal server.
func (*IS) ProcessTasksInternal ¶ added in v1.4.0
func (is *IS) ProcessTasksInternal(srv v1.InferenceInternalService_ProcessTasksInternalServer) error
ProcessTasksInternal processes tasks.
func (*IS) RunWithListener ¶ added in v1.4.0
RunWithListener runs the server with a given listener.
type ModelClient ¶
type ModelClient interface {
GetModel(ctx context.Context, in *mv1.GetModelRequest, opts ...grpc.CallOption) (*mv1.Model, error)
ActivateModel(ctx context.Context, in *mv1.ActivateModelRequest, opts ...grpc.CallOption) (*mv1.ActivateModelResponse, error)
}
ModelClient is an interface for a model client.
type NoopModelClient ¶
type NoopModelClient struct {
}
NoopModelClient is a no-op model client.
func (*NoopModelClient) ActivateModel ¶ added in v1.27.3
func (c *NoopModelClient) ActivateModel(ctx context.Context, in *mv1.ActivateModelRequest, opts ...grpc.CallOption) (*mv1.ActivateModelResponse, error)
ActivateModel is a no-op implementation of ActivateModel.
func (*NoopModelClient) GetModel ¶
func (c *NoopModelClient) GetModel(ctx context.Context, in *mv1.GetModelRequest, opts ...grpc.CallOption) (*mv1.Model, error)
GetModel is a no-op implementation of GetModel.
type NoopRewriter ¶
type NoopRewriter struct {
}
NoopRewriter is a no-op rewriter.
func (*NoopRewriter) ProcessMessages ¶
func (r *NoopRewriter) ProcessMessages( ctx context.Context, vstore *vsv1.VectorStore, messages []*v1.ChatCompletionMessage, ) ([]*v1.ChatCompletionMessage, error)
ProcessMessages is a no-op implementation of ProcessMessages.
type NoopVectorStoreClient ¶
type NoopVectorStoreClient struct {
}
NoopVectorStoreClient is a no-op vector store client.
func (*NoopVectorStoreClient) GetVectorStoreByName ¶
func (c *NoopVectorStoreClient) GetVectorStoreByName(ctx context.Context, req *vsv1.GetVectorStoreByNameRequest, opts ...grpc.CallOption) (*vsv1.VectorStore, error)
GetVectorStoreByName is a no-op implementation of GetVectorStoreByName.
type Rewriter ¶
type Rewriter interface {
ProcessMessages(
ctx context.Context,
vstore *vsv1.VectorStore,
messages []*v1.ChatCompletionMessage,
) ([]*v1.ChatCompletionMessage, error)
}
Rewriter is an interface for rag.
type S ¶
type S struct {
v1.UnimplementedChatServiceServer
// contains filtered or unexported fields
}
S is a server.
func New ¶
func New( m metricsMonitoring, usage sender.UsageSetter, rate rate.Limiter, modelClient ModelClient, vsClient VectorStoreClient, r Rewriter, taskSender taskSender, nims []string, logger logr.Logger, ) *S
New creates a server.
func (*S) CreateAudioTranscription ¶ added in v1.31.0
func (s *S) CreateAudioTranscription( w http.ResponseWriter, req *http.Request, pathParams map[string]string, )
CreateAudioTranscription creates a new audio transcription.
TODO(kenji): Support all parameters defined in https://platform.openai.com/docs/api-reference/audio/createTranscription
func (*S) CreateChatCompletion ¶
func (s *S) CreateChatCompletion( w http.ResponseWriter, req *http.Request, pathParams map[string]string, )
CreateChatCompletion creates a chat completion.
func (*S) CreateCompletion ¶
func (s *S) CreateCompletion( w http.ResponseWriter, req *http.Request, pathParams map[string]string, )
CreateCompletion creates a (legacy) completion.
The implementation is similar to CreateChatCompletion, but this has extra logic for converting a legacy request to a non-legacy request (and vice versa for response).
TODO(kenji): Avoid code duplication CreateChatCompletion.
func (*S) CreateEmbedding ¶
func (s *S) CreateEmbedding( w http.ResponseWriter, req *http.Request, pathParams map[string]string, )
CreateEmbedding creates an embedding.
func (*S) CreateModelResponse ¶ added in v1.36.0
func (s *S) CreateModelResponse( w http.ResponseWriter, req *http.Request, pathParams map[string]string, )
CreateModelResponse create a model response.
func (*S) GracefulStop ¶ added in v1.25.0
func (s *S) GracefulStop()
GracefulStop gracefully stops the gRPC server.
type VectorStoreClient ¶
type VectorStoreClient interface {
GetVectorStoreByName(ctx context.Context, req *vsv1.GetVectorStoreByNameRequest, opts ...grpc.CallOption) (*vsv1.VectorStore, error)
}
VectorStoreClient is an interface for a vector store client.
type WS ¶
type WS struct {
v1.UnimplementedInferenceWorkerServiceServer
// contains filtered or unexported fields
}
WS is a server for worker services.
func NewWorkerServiceServer ¶
func NewWorkerServiceServer(infProcessor *infprocessor.P, logger logr.Logger) *WS
NewWorkerServiceServer creates a new worker service server.
func (*WS) GracefulStop ¶ added in v1.25.0
func (ws *WS) GracefulStop()
GracefulStop gracefully stops the worker service server.
func (*WS) ProcessTasks ¶
func (ws *WS) ProcessTasks(srv v1.InferenceWorkerService_ProcessTasksServer) error
ProcessTasks processes tasks.
func (*WS) Run ¶
func (ws *WS) Run(ctx context.Context, port int, authConfig config.AuthConfig, tlsConfig *config.TLS) error
Run runs the worker service server.