llamacpp

package
v1.0.8 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 11, 2025 License: Apache-2.0 Imports: 27 Imported by: 1

Documentation

Index

Constants

View Source
const (
	// Name is the backend name.
	Name = "llama.cpp"
)
View Source
const UnlimitedContextSize = -1

Variables

View Source
var (
	ShouldUseGPUVariant      bool
	ShouldUseGPUVariantLock  sync.Mutex
	ShouldUpdateServer       = true
	ShouldUpdateServerLock   sync.Mutex
	DesiredServerVersion     = "latest"
	DesiredServerVersionLock sync.Mutex
)

Functions

func CanUseGPU

func CanUseGPU(context.Context, string) (bool, error)

func GetContextSize

func GetContextSize(modelCfg types.Config, backendCfg *inference.BackendConfiguration) *int32

func GetDesiredServerVersion

func GetDesiredServerVersion() string

func GetReasoningBudget added in v1.0.7

func GetReasoningBudget(backendCfg *inference.BackendConfiguration) *int32

func New

func New(
	log logging.Logger,
	modelManager *models.Manager,
	serverLog logging.Logger,
	vendoredServerStoragePath string,
	updatedServerStoragePath string,
	conf config.BackendConfig,
) (inference.Backend, error)

New creates a new llama.cpp-based backend.

func SetDesiredServerVersion

func SetDesiredServerVersion(version string)

Types

type Config

type Config struct {
	// Args are the base arguments that are always included.
	Args []string
}

Config is the configuration for the llama.cpp backend.

func NewDefaultLlamaCppConfig

func NewDefaultLlamaCppConfig() *Config

NewDefaultLlamaCppConfig creates a new LlamaCppConfig with default values.

func (*Config) GetArgs

func (c *Config) GetArgs(bundle types.ModelBundle, socket string, mode inference.BackendMode, config *inference.BackendConfiguration) ([]string, error)

GetArgs implements BackendConfig.GetArgs.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL