Documentation
¶
Index ¶
- Constants
- Variables
- func DockerClientForContext(cli *command.DockerCli, name string) (*clientpkg.Client, error)
- type BackendStatus
- type Client
- func (c *Client) Chat(backend, model, prompt, apiKey string, outputFunc func(string), ...) error
- func (c *Client) ConfigureBackend(request scheduling.ConfigureRequest) error
- func (c *Client) DF() (DiskUsage, error)
- func (c *Client) Inspect(model string, remote bool) (dmrm.Model, error)
- func (c *Client) InspectOpenAI(model string) (dmrm.OpenAIModel, error)
- func (c *Client) List() ([]dmrm.Model, error)
- func (c *Client) ListOpenAI(backend, apiKey string) (dmrm.OpenAIModelList, error)
- func (c *Client) LoadModel(ctx context.Context, r io.Reader) error
- func (c *Client) PS() ([]BackendStatus, error)
- func (c *Client) Pull(model string, ignoreRuntimeMemoryCheck bool, progress func(string)) (string, bool, error)
- func (c *Client) Push(model string, progress func(string)) (string, bool, error)
- func (c *Client) Remove(models []string, force bool) (string, error)
- func (c *Client) Requests(modelFilter string, streaming bool, includeExisting bool) (io.ReadCloser, func(), error)
- func (c *Client) Status() Status
- func (c *Client) Tag(source, targetRepo, targetTag string) error
- func (c *Client) Unload(req UnloadRequest) (UnloadResponse, error)
- type DiskUsage
- type DockerHttpClient
- type Layer
- type ModelRunnerContext
- type OpenAIChatMessage
- type OpenAIChatRequest
- type OpenAIChatResponse
- type ProgressMessage
- type Status
- type UnloadRequest
- type UnloadResponse
Constants ¶
const DefaultBackend = "llama.cpp"
Variables ¶
var ( ErrNotFound = errors.New("model not found") )
var Version = "dev"
Functions ¶
Types ¶
type BackendStatus ¶ added in v0.1.25
type BackendStatus struct {
// BackendName is the name of the backend
BackendName string `json:"backend_name"`
// ModelName is the name of the model loaded in the backend
ModelName string `json:"model_name"`
// Mode is the mode the backend is operating in
Mode string `json:"mode"`
// LastUsed represents when this backend was last used (if it's idle)
LastUsed time.Time `json:"last_used,omitempty"`
}
BackendStatus to be imported from docker/model-runner when https://github.com/docker/model-runner/pull/42 is merged.
type Client ¶
type Client struct {
// contains filtered or unexported fields
}
func New ¶
func New(modelRunner *ModelRunnerContext) *Client
func (*Client) Chat ¶
func (c *Client) Chat(backend, model, prompt, apiKey string, outputFunc func(string), shouldUseMarkdown bool) error
Chat performs a chat request and streams the response content with selective markdown rendering.
func (*Client) ConfigureBackend ¶ added in v0.1.27
func (c *Client) ConfigureBackend(request scheduling.ConfigureRequest) error
func (*Client) InspectOpenAI ¶
func (c *Client) InspectOpenAI(model string) (dmrm.OpenAIModel, error)
func (*Client) ListOpenAI ¶
func (c *Client) ListOpenAI(backend, apiKey string) (dmrm.OpenAIModelList, error)
func (*Client) PS ¶ added in v0.1.25
func (c *Client) PS() ([]BackendStatus, error)
func (*Client) Requests ¶ added in v0.1.41
func (c *Client) Requests(modelFilter string, streaming bool, includeExisting bool) (io.ReadCloser, func(), error)
Requests returns a response body and a cancel function to ensure proper cleanup.
func (*Client) Unload ¶ added in v0.1.25
func (c *Client) Unload(req UnloadRequest) (UnloadResponse, error)
type DiskUsage ¶ added in v0.1.25
type DiskUsage struct {
ModelsDiskUsage int64 `json:"models_disk_usage"`
DefaultBackendDiskUsage int64 `json:"default_backend_disk_usage"`
}
DiskUsage to be imported from docker/model-runner when https://github.com/docker/model-runner/pull/45 is merged.
type DockerHttpClient ¶
type ModelRunnerContext ¶ added in v0.1.20
type ModelRunnerContext struct {
// contains filtered or unexported fields
}
ModelRunnerContext encodes the operational context of a Model CLI command and provides facilities for inspecting and interacting with the Model Runner.
func DetectContext ¶ added in v0.1.20
DetectContext determines the current Docker Model Runner context.
func NewContextForMock ¶ added in v0.1.20
func NewContextForMock(client DockerHttpClient) *ModelRunnerContext
NewContextForMock is a ModelRunnerContext constructor exposed only for the purposes of mock testing.
func (*ModelRunnerContext) Client ¶ added in v0.1.20
func (c *ModelRunnerContext) Client() DockerHttpClient
Client returns an HTTP client appropriate for accessing the model runner.
func (*ModelRunnerContext) EngineKind ¶ added in v0.1.20
func (c *ModelRunnerContext) EngineKind() types.ModelRunnerEngineKind
EngineKind returns the Docker engine kind associated with the model runner.
func (*ModelRunnerContext) URL ¶ added in v0.1.20
func (c *ModelRunnerContext) URL(path string) string
URL constructs a URL string appropriate for the model runner.
type OpenAIChatMessage ¶
type OpenAIChatRequest ¶
type OpenAIChatRequest struct {
Model string `json:"model"`
Messages []OpenAIChatMessage `json:"messages"`
Stream bool `json:"stream"`
}
type OpenAIChatResponse ¶
type OpenAIChatResponse struct {
ID string `json:"id"`
Object string `json:"object"`
Created int64 `json:"created"`
Model string `json:"model"`
Choices []struct {
Delta struct {
Content string `json:"content"`
Role string `json:"role,omitempty"`
ReasoningContent string `json:"reasoning_content,omitempty"`
} `json:"delta"`
Index int `json:"index"`
FinishReason string `json:"finish_reason"`
} `json:"choices"`
Usage *struct {
CompletionTokens int `json:"completion_tokens"`
PromptTokens int `json:"prompt_tokens"`
TotalTokens int `json:"total_tokens"`
} `json:"usage,omitempty"`
}
type ProgressMessage ¶
type ProgressMessage struct {
Type string `json:"type"` // "progress", "success", or "error"
Message string `json:"message"` // Deprecated: the message should be defined by clients based on Message.Total and Message.Layer
Total uint64 `json:"total"`
Pulled uint64 `json:"pulled"` // Deprecated: use Layer.Current
Layer Layer `json:"layer"` // Current layer information
}
ProgressMessage represents a structured message for progress reporting
type UnloadRequest ¶ added in v0.1.25
type UnloadRequest struct {
All bool `json:"all"`
Backend string `json:"backend"`
Models []string `json:"models"`
}
UnloadRequest to be imported from docker/model-runner when https://github.com/docker/model-runner/pull/46 is merged.
type UnloadResponse ¶ added in v0.1.25
type UnloadResponse struct {
UnloadedRunners int `json:"unloaded_runners"`
}
UnloadResponse to be imported from docker/model-runner when https://github.com/docker/model-runner/pull/46 is merged.