Documentation
¶
Index ¶
- Constants
- Variables
- type PullerServer
- func (s *PullerServer) LoadModel(ctx context.Context, req *mmesh.LoadModelRequest) (*mmesh.LoadModelResponse, error)
- func (s *PullerServer) ModelSize(ctx context.Context, req *mmesh.ModelSizeRequest) (*mmesh.ModelSizeResponse, error)
- func (s *PullerServer) PredictModelSize(ctx context.Context, req *mmesh.PredictModelSizeRequest) (*mmesh.PredictModelSizeResponse, error)
- func (s *PullerServer) RuntimeStatus(ctx context.Context, req *mmesh.RuntimeStatusRequest) (*mmesh.RuntimeStatusResponse, error)
- func (s *PullerServer) StartServer() error
- func (s *PullerServer) UnloadModel(ctx context.Context, req *mmesh.UnloadModelRequest) (*mmesh.UnloadModelResponse, error)
- type PullerServerConfiguration
Constants ¶
const (
StateManagerChannelLength = 25
)
Variables ¶
var PurgeExcludePrefixes = []string{"_"}
Functions ¶
This section is empty.
Types ¶
type PullerServer ¶
type PullerServer struct {
Log logr.Logger
// embed generated Unimplemented type for forward-compatibility for gRPC
mmesh.UnimplementedModelRuntimeServer
// contains filtered or unexported fields
}
PullerServer represents the GRPC server and its configuration
func NewPullerServer ¶
func NewPullerServer(log logr.Logger) *PullerServer
NewPullerServer creates a new PullerServer instance and initializes it with configuration from the environment
func NewPullerServerFromConfig ¶
func NewPullerServerFromConfig(log logr.Logger, config *PullerServerConfiguration) *PullerServer
NewPullerServerFromConfig creates a new PullerServer instance with the given configuration
func (*PullerServer) LoadModel ¶
func (s *PullerServer) LoadModel(ctx context.Context, req *mmesh.LoadModelRequest) (*mmesh.LoadModelResponse, error)
LoadModel loads a model and returns when model is fully loaded. See model-runtime.proto loadModel()
func (*PullerServer) ModelSize ¶
func (s *PullerServer) ModelSize(ctx context.Context, req *mmesh.ModelSizeRequest) (*mmesh.ModelSizeResponse, error)
ModelSize calculates the size (memory consumption) of a currently-loaded model. See model-runtime.proto modelSize() This is a Direct passthrough to the model runtime grpc
func (*PullerServer) PredictModelSize ¶
func (s *PullerServer) PredictModelSize(ctx context.Context, req *mmesh.PredictModelSizeRequest) (*mmesh.PredictModelSizeResponse, error)
PredictModelSize predicts the size of not-yet-loaded model - must return almost immediately. See model-runtime.proto predictModelSize() This is a Direct passthrough to the model runtime grpc
func (*PullerServer) RuntimeStatus ¶
func (s *PullerServer) RuntimeStatus(ctx context.Context, req *mmesh.RuntimeStatusRequest) (*mmesh.RuntimeStatusResponse, error)
RuntimeStatus provides basic runtime status and parameters; called only during startup. This is a Direct passthrough to the model runtime grpc See model-runtime.proto runtimeStatus()
func (*PullerServer) StartServer ¶
func (s *PullerServer) StartServer() error
StartServer runs the gRPC server. This func will not return unless the server fails.
func (*PullerServer) UnloadModel ¶
func (s *PullerServer) UnloadModel(ctx context.Context, req *mmesh.UnloadModelRequest) (*mmesh.UnloadModelResponse, error)
UnloadModel unloads a previously loaded (or failed) model and returns when model is fully unloaded, or immediately if not found/loaded. See model-runtime.proto unloadModel()
type PullerServerConfiguration ¶
type PullerServerConfiguration struct {
Port int // Port to run this puller grpc server
ModelServerEndpoint string // model server endpoint
}
PullerServerConfiguration stores configuration variables for the puller server
func GetPullerServerConfigFromEnv ¶
func GetPullerServerConfigFromEnv(log logr.Logger) *PullerServerConfiguration
GetPullerServerConfigFromEnv creates a new PullerConfiguration populated from environment variables