config

package

v0.30.0 Latest Latest Go to latest Published: May 7, 2026 License: Apache-2.0 Imports: 16 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/kombifyio/SpeechKit

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
func ApplyLocalInstallDefaults(cfg *Config, state *InstallState) bool
func ApplyManagedDevServerDefaults(cfg *Config) bool
func ApplyManagedIntegrationDefaults(cfg *Config) bool
func ApplyServerAuthSettings(cfg *Config, auth ServerAuthSettings) []string
func ApplyServerModelSettings(cfg *Config, settings ServerModelSettings) []string
func ApplyServerModelSettingsFile(cfg *Config) ([]string, error)
func ApplyServerRuntimeDefaults(cfg *Config) []string
func HuggingFaceTokenEnvName(cfg *Config) string
func HuggingFaceTokenStatus(cfg *Config) (secrets.TokenStatus, error)
func IsFirstRun() bool
func ManagedDevServerAvailableInBuild() bool
func ManagedHuggingFaceAvailableInBuild() bool
func NormalizeHotkeyBehavior(value, fallback string) string
func NormalizeOverlayFeedbackMode(value, fallback string) string
func NormalizeVoiceAgentCloseBehavior(value, fallback string) string
func OverrideManagedDevServerBuildForTests(value string) func()
func OverrideManagedHuggingFaceBuildForTests(value string) func()
func ResolveHuggingFaceToken(cfg *Config) (string, secrets.TokenStatus, error)
func ResolveSecret(envName string) string
func ResolveSecretFromEnvironmentOrDoppler(envName string) string
func Save(path string, cfg *Config) error
func SaveInstallState(state *InstallState) error
func SaveServerModelSettings(path string, settings ServerModelSettings) error
func ServerSettingsPath(cfg *Config) string
func ValidateServerProductionAuth(cfg *Config) error
type AssistConfig
type AudioConfig
type Config
- func Load(path string) (*Config, error)
- func (cfg *Config) LegacyAgentHotkey() string
type FeedbackConfig
type GeneralConfig
- func (g GeneralConfig) LegacyAgentHotkey() string
type GoogleProviderConfig
type GroqProviderConfig
type HuggingFaceConfig
type InstallMode
type InstallState
- func LoadInstallState() (*InstallState, error)
type LocalConfig
type LocalLLMConfig
type ModeModelSelection
- func (sel ModeModelSelection) ResolvedModeSource() string
type ModelSelectionConfig
- func BuiltInPrimaryModelSelectionDefaults() ModelSelectionConfig
type OllamaProviderConfig
type OpenAIProviderConfig
type OpenRouterProviderConfig
type OverlayFreePosition
type PersonaConfig
type ProvidersConfig
type RoleConfig
type RoutingConfig
type SequenceConfig
type SequenceStepConfig
type ServerAssistSettings
type ServerAuthSettings
type ServerConfig
type ServerConnectionConfig
type ServerCredentialSettings
type ServerDictationSettings
type ServerFeaturesConfig
type ServerLLMSettings
type ServerModeProviderSettings
type ServerModeSetting
type ServerModelSettings
- func LoadServerModelSettings(path string) (ServerModelSettings, bool, error)
- func NormalizeServerModelSettings(settings ServerModelSettings) ServerModelSettings
- func SanitizeServerModelSettings(settings ServerModelSettings) ServerModelSettings
type ServerOptionalTTSSettings
type ServerProviderCredentialSettings
type ServerSTTSettings
type ServerVoiceAgentSettings
type ShortcutLocaleConfig
type ShortcutsConfig
type StoreConfig
type TTSConfig
type TTSGoogle
type TTSHuggingFace
type TTSLocal
type TTSOpenAI
type UIConfig
type VPSConfig
type VocabularyConfig
type VoiceAgentConfig

Constants ¶

View Source

const (
	HotkeyBehaviorPushToTalk = "push_to_talk"
	HotkeyBehaviorToggle     = "toggle"

	VoiceAgentCloseBehaviorContinue = "continue"
	VoiceAgentCloseBehaviorNewChat  = "new_chat"

	OverlayFeedbackModeBigProductivity = "big_productivity"
	OverlayFeedbackModeSmallFeedback   = "small_feedback"

	DefaultLocalLLMBaseURL = "http://127.0.0.1:8082/v1"
	DefaultLocalLLMModel   = "ggml-org/gemma-4-E4B-it-GGUF:Q4_K_M"
	DefaultLocalSTTModel   = "ggml-large-v3-turbo.bin"
	ManagedDevServerURL    = "https://speechkit.kombify.dev"

	DefaultDictatePrimaryProfileID    = "stt.local.whispercpp"
	DefaultAssistPrimaryProfileID     = "assist.builtin.gemma4-e4b"
	DefaultVoiceAgentPrimaryProfileID = "realtime.builtin.pipeline"
)

View Source

const (
	ModeSourceLocal  = "local"
	ModeSourceServer = "server"
)

Mode source values for ModeModelSelection.ModeSource. "local" means the desktop app runs the mode against the in-process Framework kernel (default, preserves all pre-0.26 behaviour). "server" routes the mode through ServerConnection to a remote speechkit-server.

View Source

const (
	ServerSettingsPathEnv  = "SPEECHKIT_SERVER_SETTINGS_PATH"
	ServerSettingsWriteEnv = "SPEECHKIT_SERVER_SETTINGS_WRITE"
	ServerOnboardingUIEnv  = "SPEECHKIT_SERVER_ONBOARDING_UI"

	ServerAuthModeManagedBearer = "managed_bearer"
	ServerAuthModeSelfManaged   = "self_managed"
)

View Source

const AllowInsecureNoAuthEnv = "SPEECHKIT_ALLOW_INSECURE_NO_AUTH"

View Source

const (
	ServerSelfHostedDefaultsEnv = "SPEECHKIT_SELFHOSTED_DEFAULTS"
)

Variables ¶

This section is empty.

Functions ¶

func ApplyLocalInstallDefaults ¶ added in v0.14.1

func ApplyLocalInstallDefaults(cfg *Config, state *InstallState) bool

ApplyLocalInstallDefaults keeps a pending local install local-first while the onboarding download flow prepares the selected Whisper model.

func ApplyManagedDevServerDefaults ¶ added in v0.28.2

func ApplyManagedDevServerDefaults(cfg *Config) bool

func ApplyManagedIntegrationDefaults ¶

func ApplyManagedIntegrationDefaults(cfg *Config) bool

func ApplyServerAuthSettings ¶ added in v0.28.2

func ApplyServerAuthSettings(cfg *Config, auth ServerAuthSettings) []string

func ApplyServerModelSettings ¶ added in v0.28.0

func ApplyServerModelSettings(cfg *Config, settings ServerModelSettings) []string

func ApplyServerModelSettingsFile ¶ added in v0.28.0

func ApplyServerModelSettingsFile(cfg *Config) ([]string, error)

func ApplyServerRuntimeDefaults ¶ added in v0.28.0

func ApplyServerRuntimeDefaults(cfg *Config) []string

ApplyServerRuntimeDefaults turns the standalone Linux Server-Target into a working self-hosted deployment when SPEECHKIT_SELFHOSTED_DEFAULTS is set. Desktop code never calls this; it is intentionally opt-in for server containers that ship local STT/LLM sidecars.

func HuggingFaceTokenEnvName ¶

func HuggingFaceTokenEnvName(cfg *Config) string

func HuggingFaceTokenStatus ¶

func HuggingFaceTokenStatus(cfg *Config) (secrets.TokenStatus, error)

func IsFirstRun ¶

func IsFirstRun() bool

IsFirstRun returns true if no install state file exists.

func ManagedDevServerAvailableInBuild ¶ added in v0.28.2

func ManagedDevServerAvailableInBuild() bool

func ManagedHuggingFaceAvailableInBuild ¶ added in v0.14.6

func ManagedHuggingFaceAvailableInBuild() bool

func NormalizeHotkeyBehavior ¶ added in v0.21.1

func NormalizeHotkeyBehavior(value, fallback string) string

func NormalizeOverlayFeedbackMode ¶ added in v0.22.4

func NormalizeOverlayFeedbackMode(value, fallback string) string

func NormalizeVoiceAgentCloseBehavior ¶ added in v0.21.1

func NormalizeVoiceAgentCloseBehavior(value, fallback string) string

func OverrideManagedDevServerBuildForTests ¶ added in v0.28.2

func OverrideManagedDevServerBuildForTests(value string) func()

func OverrideManagedHuggingFaceBuildForTests ¶ added in v0.14.6

func OverrideManagedHuggingFaceBuildForTests(value string) func()

func ResolveHuggingFaceToken ¶

func ResolveHuggingFaceToken(cfg *Config) (string, secrets.TokenStatus, error)

func ResolveSecret ¶

func ResolveSecret(envName string) string

ResolveSecret resolves a secret by name. Checks environment first, then Doppler CLI using either explicit DOPPLER_PROJECT/DOPPLER_CONFIG env vars or build-embedded managed Doppler defaults.

func ResolveSecretFromEnvironmentOrDoppler ¶ added in v0.14.8

func ResolveSecretFromEnvironmentOrDoppler(envName string) string

func Save ¶

func Save(path string, cfg *Config) error

func SaveInstallState ¶

func SaveInstallState(state *InstallState) error

SaveInstallState writes the install state to disk.

func SaveServerModelSettings ¶ added in v0.28.0

func SaveServerModelSettings(path string, settings ServerModelSettings) error

func ServerSettingsPath ¶ added in v0.28.0

func ServerSettingsPath(cfg *Config) string

func ValidateServerProductionAuth ¶ added in v0.29.0

func ValidateServerProductionAuth(cfg *Config) error

ValidateServerProductionAuth rejects accidental public no-auth server binds. auth_mode=none remains available for local development and explicit tests.

Types ¶

type AssistConfig ¶ added in v0.28.0

type AssistConfig struct {
	EnabledTools []string `toml:"enabled_tools"`
}

type AudioConfig ¶

type AudioConfig struct {
	Backend        string `toml:"backend"`
	DeviceID       string `toml:"device_id"`
	OutputDeviceID string `toml:"output_device_id"`
	SampleRate     int    `toml:"sample_rate"`
	Channels       int    `toml:"channels"`
	FrameSizeMs    int    `toml:"frame_size_ms"`
	LatencyHint    string `toml:"latency_hint"`
}

type Config ¶

type Config struct {
	General        GeneralConfig        `toml:"general"`
	Audio          AudioConfig          `toml:"audio"`
	UI             UIConfig             `toml:"ui"`
	Vocabulary     VocabularyConfig     `toml:"vocabulary"`
	Assist         AssistConfig         `toml:"assist"`
	Shortcuts      ShortcutsConfig      `toml:"shortcuts"`
	ModelSelection ModelSelectionConfig `toml:"model_selection"`

	// ServerConnection points the device/local-target at a remote SpeechKit
	// Server-Target. Only consulted when at least one mode in ModelSelection
	// has mode_source = "server". Disabled by default; the desktop app runs
	// fully self-contained until a user opts a mode into server-side
	// execution (typically via onboarding or settings).
	ServerConnection ServerConnectionConfig `toml:"server_connection"`

	Local       LocalConfig       `toml:"local"`
	LocalLLM    LocalLLMConfig    `toml:"local_llm"`
	VPS         VPSConfig         `toml:"vps"`
	HuggingFace HuggingFaceConfig `toml:"huggingface"`
	Routing     RoutingConfig     `toml:"routing"`
	Feedback    FeedbackConfig    `toml:"feedback"` // legacy compat; prefer Store
	Store       StoreConfig       `toml:"store"`
	Providers   ProvidersConfig   `toml:"providers"`
	TTS         TTSConfig         `toml:"tts"`
	VoiceAgent  VoiceAgentConfig  `toml:"voice_agent"`

	// Server configures the standalone Linux server binary (cmd/speechkit-server).
	// All fields are optional; the desktop app (cmd/speechkit) ignores them entirely.
	Server    ServerConfig     `toml:"server"`
	Personas  []PersonaConfig  `toml:"personas"`
	Roles     []RoleConfig     `toml:"roles"`
	Sequences []SequenceConfig `toml:"sequences"`
}

func Load ¶

func Load(path string) (*Config, error)

Load reads config from the given path. Falls back to defaults if file not found.

func (*Config) LegacyAgentHotkey ¶ added in v0.19.0

func (cfg *Config) LegacyAgentHotkey() string

type FeedbackConfig ¶

type FeedbackConfig struct {
	SaveAudio          bool   `toml:"save_audio"`
	AudioRetentionDays int    `toml:"audio_retention_days"`
	DBPath             string `toml:"db_path"`
	MaxAudioStorageMB  int    `toml:"max_audio_storage_mb"`
}

type GeneralConfig ¶

type GeneralConfig struct {
	Language                 string `toml:"language"`
	Hotkey                   string `toml:"hotkey"` // Deprecated: legacy single-hotkey field kept for config file compat. Use DictateHotkey.
	DictateHotkey            string `toml:"dictate_hotkey"`
	AssistHotkey             string `toml:"assist_hotkey"`
	VoiceAgentHotkey         string `toml:"voice_agent_hotkey"`
	DictateHotkeyBehavior    string `toml:"dictate_hotkey_behavior"`
	AssistHotkeyBehavior     string `toml:"assist_hotkey_behavior"`
	VoiceAgentHotkeyBehavior string `toml:"voice_agent_hotkey_behavior"`
	DictateEnabled           bool   `toml:"dictate_enabled"`
	AssistEnabled            bool   `toml:"assist_enabled"`
	VoiceAgentEnabled        bool   `toml:"voice_agent_enabled"`
	AutoStartOnLaunch        bool   `toml:"auto_start_on_launch"`
	AgentHotkey              string `toml:"agent_hotkey"`
	AgentMode                string `toml:"agent_mode"`  // "assist" or "voice_agent" — determines what agent_hotkey triggers
	ActiveMode               string `toml:"active_mode"` // legacy compat
	HotkeyMode               string `toml:"hotkey_mode"` // legacy compat for single behavior setting
	AutoStopSilenceMs        int    `toml:"auto_stop_silence_ms"`
	FastModeSilenceMs        int    `toml:"fast_mode_silence_ms"` // silence threshold for Quick Capture auto-stop
	ModelDownloadDir         string `toml:"model_download_dir"`   // Default directory for downloaded local model files
}

func (GeneralConfig) LegacyAgentHotkey ¶ added in v0.19.0

func (g GeneralConfig) LegacyAgentHotkey() string

type GoogleProviderConfig ¶

type GoogleProviderConfig struct {
	Enabled      bool   `toml:"enabled"`
	APIKeyEnv    string `toml:"api_key_env"`
	STTModel     string `toml:"stt_model"`
	UtilityModel string `toml:"utility_model"`
	AssistModel  string `toml:"assist_model"`
	AgentModel   string `toml:"agent_model"`
}

type GroqProviderConfig ¶

type GroqProviderConfig struct {
	Enabled      bool   `toml:"enabled"`
	APIKeyEnv    string `toml:"api_key_env"`
	STTModel     string `toml:"stt_model"`
	UtilityModel string `toml:"utility_model"`
	AssistModel  string `toml:"assist_model"`
	AgentModel   string `toml:"agent_model"`
}

type HuggingFaceConfig ¶

type HuggingFaceConfig struct {
	Enabled      bool   `toml:"enabled"`
	Model        string `toml:"model"`
	UtilityModel string `toml:"utility_model"`
	AssistModel  string `toml:"assist_model"`
	AgentModel   string `toml:"agent_model"`
	TokenEnv     string `toml:"token_env"`
}

type InstallMode ¶

type InstallMode string

InstallMode defines whether SpeechKit runs locally or connected to an external host.

const (
	InstallModeLocal  InstallMode = "local"
	InstallModeCloud  InstallMode = "cloud"
	InstallModeNotSet InstallMode = ""
)

type InstallState ¶

type InstallState struct {
	Mode      InstallMode `toml:"mode"`
	SetupDone bool        `toml:"setup_done"`
	DeviceID  string      `toml:"device_id"`
}

InstallState persists the user's install mode choice and device identity. Stored in %APPDATA%/SpeechKit/install.toml, separate from config.toml.

func LoadInstallState ¶

func LoadInstallState() (*InstallState, error)

LoadInstallState reads the install state from disk. Returns a default (empty mode) if the file doesn't exist.

type LocalConfig ¶

type LocalConfig struct {
	Enabled   bool   `toml:"enabled"`
	Model     string `toml:"model"`
	ModelPath string `toml:"model_path"`
	Port      int    `toml:"port"`
	GPU       string `toml:"gpu"`
}

type LocalLLMConfig ¶ added in v0.22.1

type LocalLLMConfig struct {
	Enabled      bool   `toml:"enabled"`
	BaseURL      string `toml:"base_url"`
	Model        string `toml:"model"`
	ModelPath    string `toml:"model_path"`
	Port         int    `toml:"port"`
	GPU          string `toml:"gpu"`
	UtilityModel string `toml:"utility_model"`
	AssistModel  string `toml:"assist_model"`
	AgentModel   string `toml:"agent_model"`
}

type ModeModelSelection ¶ added in v0.21.1

type ModeModelSelection struct {
	PrimaryProfileID  string `toml:"primary_profile_id"`
	FallbackProfileID string `toml:"fallback_profile_id"`

	// ModeSource selects whether this mode runs locally (Framework kernel
	// in-process, default) or against a remote SpeechKit Server-Target
	// configured under [server_connection]. Empty string is treated as
	// ModeSourceLocal so existing configs keep behaving as before.
	ModeSource string `toml:"mode_source"`
}

func (ModeModelSelection) ResolvedModeSource ¶ added in v0.26.0

func (sel ModeModelSelection) ResolvedModeSource() string

ResolvedModeSource returns the effective ModeSource for this mode, normalising the empty default to ModeSourceLocal. Use this everywhere instead of reading sel.ModeSource directly so a missing TOML field does not silently mean "server".

type ModelSelectionConfig ¶ added in v0.21.1

type ModelSelectionConfig struct {
	Dictate    ModeModelSelection `toml:"dictate"`
	Assist     ModeModelSelection `toml:"assist"`
	VoiceAgent ModeModelSelection `toml:"voice_agent"`
}

func BuiltInPrimaryModelSelectionDefaults ¶ added in v0.22.4

func BuiltInPrimaryModelSelectionDefaults() ModelSelectionConfig

type OllamaProviderConfig ¶

type OllamaProviderConfig struct {
	Enabled      bool   `toml:"enabled"`
	BaseURL      string `toml:"base_url"`
	STTModel     string `toml:"stt_model"`
	UtilityModel string `toml:"utility_model"`
	AssistModel  string `toml:"assist_model"`
	AgentModel   string `toml:"agent_model"`
}

type OpenAIProviderConfig ¶

type OpenAIProviderConfig struct {
	Enabled       bool   `toml:"enabled"`
	APIKeyEnv     string `toml:"api_key_env"`
	STTModel      string `toml:"stt_model"`
	UtilityModel  string `toml:"utility_model"`
	AssistModel   string `toml:"assist_model"`
	AgentModel    string `toml:"agent_model"`
	TTSModel      string `toml:"tts_model"`
	TTSVoice      string `toml:"tts_voice"`
	RealtimeModel string `toml:"realtime_model"`
}

type OpenRouterProviderConfig ¶ added in v0.15.0

type OpenRouterProviderConfig struct {
	Enabled      bool   `toml:"enabled"`
	APIKeyEnv    string `toml:"api_key_env"`
	STTModel     string `toml:"stt_model"`
	UtilityModel string `toml:"utility_model"`
	AssistModel  string `toml:"assist_model"`
	AgentModel   string `toml:"agent_model"`
}

type OverlayFreePosition ¶ added in v0.19.0

type OverlayFreePosition struct {
	X int `toml:"x"`
	Y int `toml:"y"`
}

type PersonaConfig ¶ added in v0.26.0

type PersonaConfig struct {
	ID              string            `toml:"id"`
	DisplayName     string            `toml:"display_name"`
	Description     string            `toml:"description"`
	Voice           string            `toml:"voice"`
	Locale          string            `toml:"locale"`
	DefaultRole     string            `toml:"default_role"`
	DefaultSequence string            `toml:"default_sequence"`
	Tags            []string          `toml:"tags"`
	Metadata        map[string]string `toml:"metadata"`
}

PersonaConfig is a TOML-seeded Voice Agent persona. DB entries with the same ID override the TOML seed at runtime.

type ProvidersConfig ¶

type ProvidersConfig struct {
	OpenAI     OpenAIProviderConfig     `toml:"openai"`
	Groq       GroqProviderConfig       `toml:"groq"`
	Google     GoogleProviderConfig     `toml:"google"`
	Ollama     OllamaProviderConfig     `toml:"ollama"`
	OpenRouter OpenRouterProviderConfig `toml:"openrouter"`
}

ProvidersConfig groups all external provider configurations.

type RoleConfig ¶ added in v0.26.0

type RoleConfig struct {
	ID                          string   `toml:"id"`
	DisplayName                 string   `toml:"display_name"`
	SystemPrompt                string   `toml:"system_prompt"`
	RefinementPrompt            string   `toml:"refinement_prompt"`
	Locale                      string   `toml:"locale"`
	VocabularyHint              string   `toml:"vocabulary_hint"`
	ToolAllowlist               []string `toml:"tool_allowlist"`
	Temperature                 float64  `toml:"temperature"`
	ThinkingEnabled             bool     `toml:"thinking_enabled"`
	ThinkingLevel               string   `toml:"thinking_level"`
	IncludeThoughts             bool     `toml:"include_thoughts"`
	ThinkingBudget              int      `toml:"thinking_budget"`
	AutomaticActivityDetection  bool     `toml:"automatic_activity_detection"`
	VADStartSensitivity         string   `toml:"vad_start_sensitivity"`
	VADEndSensitivity           string   `toml:"vad_end_sensitivity"`
	VADPrefixPaddingMs          int      `toml:"vad_prefix_padding_ms"`
	VADSilenceDurationMs        int      `toml:"vad_silence_duration_ms"`
	ActivityHandling            string   `toml:"activity_handling"`
	TurnCoverage                string   `toml:"turn_coverage"`
	ContextCompressionEnabled   bool     `toml:"context_compression_enabled"`
	ContextCompressionTriggerTk int64    `toml:"context_compression_trigger_tokens"`
	ContextCompressionTargetTk  int64    `toml:"context_compression_target_tokens"`
	EnableAffectiveDialog       bool     `toml:"enable_affective_dialog"`
}

RoleConfig is a TOML-seeded Voice Agent role. Roles are referenced from Personas via ID and compose the LiveConfig prompt layers.

type RoutingConfig ¶

type RoutingConfig struct {
	Strategy                string  `toml:"strategy"`
	PreferLocalUnderSeconds float64 `toml:"prefer_local_under_seconds"`
	ParallelCloud           bool    `toml:"parallel_cloud"`
	ReplaceOnBetter         bool    `toml:"replace_on_better"`
}

type SequenceConfig ¶ added in v0.26.0

type SequenceConfig struct {
	ID          string               `toml:"id"`
	DisplayName string               `toml:"display_name"`
	Description string               `toml:"description"`
	Completion  string               `toml:"completion"` // "all_steps" | "explicit_close" | "max_turns"
	MaxTurns    int                  `toml:"max_turns"`
	Steps       []SequenceStepConfig `toml:"steps"`
}

SequenceConfig is a TOML-seeded multi-step Voice Agent workflow.

type SequenceStepConfig ¶ added in v0.26.0

type SequenceStepConfig struct {
	ID           string   `toml:"id"`
	Instruction  string   `toml:"instruction"`
	ExitCriteria string   `toml:"exit_criteria"`
	RequireTools []string `toml:"require_tools"`
	MaxTurns     int      `toml:"max_turns"`
}

SequenceStepConfig is a single step inside a SequenceConfig.

type ServerAssistSettings ¶ added in v0.28.0

type ServerAssistSettings struct {
	EnabledTools []string `json:"enabled_tools,omitempty"`
}

type ServerAuthSettings ¶ added in v0.28.2

type ServerAuthSettings struct {
	Mode           string `json:"mode,omitempty"`
	BearerTokenEnv string `json:"bearer_token_env,omitempty"`
	GenerateToken  *bool  `json:"generate_token,omitempty"`
	TokenValue     string `json:"token_value,omitempty"`
}

type ServerConfig ¶ added in v0.26.0

type ServerConfig struct {
	ListenAddr            string   `toml:"listen_addr"`          // e.g. ":8080"
	PublicURL             string   `toml:"public_url"`           // external API base URL, e.g. https://speechkit.example.com/api
	Modes                 []string `toml:"modes"`                // subset of ["dictation","assist","voiceagent"]; empty = all
	AuthMode              string   `toml:"auth_mode"`            // "none" | "bearer" | "edge_hmac" | "bearer_or_edge"
	BearerTokenEnv        string   `toml:"bearer_token_env"`     // env var name holding the bearer token
	BearerRole            string   `toml:"bearer_role"`          // optional role for static bearer callers, e.g. "admin"
	EdgeAuthSecretEnv     string   `toml:"edge_auth_secret_env"` // env var name holding the HMAC secret
	CORSAllowedOrigins    []string `toml:"cors_allowed_origins"`
	RateLimitRPS          float64  `toml:"rate_limit_rps"`
	RateLimitBurst        int      `toml:"rate_limit_burst"`
	MaxUploadMB           int      `toml:"max_upload_mb"`
	MaxVoiceAgentSessions int      `toml:"max_voiceagent_sessions"` // global cap
	MaxSessionsPerUser    int      `toml:"max_sessions_per_user"`
	TicketTTLSec          int      `toml:"ticket_ttl_sec"` // Voice Agent WS ticket TTL
	// VoiceAgentIdleTimeoutSec terminates a Voice Agent WebSocket session
	// after N seconds without any client- or provider-side activity.
	// Defaults to 900 (15 min). Set to 0 to disable the server-side idle
	// timeout (kernel-level idle handling stays in effect either way).
	VoiceAgentIdleTimeoutSec int                  `toml:"voiceagent_idle_timeout_sec"`
	WhisperBinary            string               `toml:"whisper_binary"` // absolute path inside container
	WhisperPort              int                  `toml:"whisper_port"`   // loopback port for whisper.cpp server
	ModelDir                 string               `toml:"model_dir"`      // persistent volume, e.g. /var/lib/speechkit/models
	LogFormat                string               `toml:"log_format"`     // "json" | "text"
	LogLevel                 string               `toml:"log_level"`      // "debug" | "info" | "warn" | "error"
	Features                 ServerFeaturesConfig `toml:"features"`
}

ServerConfig configures the standalone Linux server binary. Used only by cmd/speechkit-server; the desktop app never reads these values.

type ServerConnectionConfig ¶ added in v0.26.0

type ServerConnectionConfig struct {
	// Enabled gates the entire server connection. When false, every mode is
	// forced to run locally regardless of its mode_source. Lets users keep
	// their server URL in config but temporarily flip back to fully local.
	Enabled bool `toml:"enabled"`

	// URL is the base URL of the speechkit-server, e.g.
	// "https://speechkit.example.com" or "http://localhost:8080".
	URL string `toml:"url"`

	// BearerTokenEnv names the env var that holds the bearer token sent in
	// the Authorization header. Defaults to SPEECHKIT_SERVER_TOKEN. The
	// value is never read from the TOML file itself — only the env var name
	// is configured here.
	BearerTokenEnv string `toml:"bearer_token_env"`

	// FallbackToLocal makes the device app fall back to the in-process
	// Framework kernel if a server call fails or the server is unreachable.
	// Useful for laptop deployments that may be offline; should be false
	// for kiosks that must never silently downgrade to local processing.
	FallbackToLocal bool `toml:"fallback_to_local"`

	// RequestTimeoutSec caps non-streaming HTTP calls (Dictation, Assist).
	// 0 means no explicit timeout (the underlying http.Client default
	// applies). Voice Agent WebSocket sessions are not affected.
	RequestTimeoutSec int `toml:"request_timeout_sec"`
}

ServerConnectionConfig describes how the device/local-target reaches a remote SpeechKit server. Read by cmd/speechkit (and any embedded library caller) when a ModeModelSelection has mode_source = "server"; the Server-Target itself ignores this section.

type ServerCredentialSettings ¶ added in v0.28.0

type ServerCredentialSettings struct {
	OpenAI      ServerProviderCredentialSettings `json:"openai,omitempty"`
	Groq        ServerProviderCredentialSettings `json:"groq,omitempty"`
	Google      ServerProviderCredentialSettings `json:"google,omitempty"`
	HuggingFace ServerProviderCredentialSettings `json:"huggingface,omitempty"`
	OpenRouter  ServerProviderCredentialSettings `json:"openrouter,omitempty"`
}

type ServerDictationSettings ¶ added in v0.28.0

type ServerDictationSettings struct {
	Dictionary *string `json:"dictionary,omitempty"`
}

type ServerFeaturesConfig ¶ added in v0.30.0

type ServerFeaturesConfig struct {
	Catalog      bool `toml:"catalog"`
	StorageReads bool `toml:"storage_reads"`
	Vocabulary   bool `toml:"vocabulary"`
	TTSDirect    bool `toml:"tts_direct"`
}

type ServerLLMSettings ¶ added in v0.28.0

type ServerLLMSettings struct {
	Enabled      *bool  `json:"enabled,omitempty"`
	BaseURL      string `json:"base_url,omitempty"`
	UtilityModel string `json:"utility_model,omitempty"`
	AssistModel  string `json:"assist_model,omitempty"`
	AgentModel   string `json:"agent_model,omitempty"`
	HFRepo       string `json:"hf_repo,omitempty"`
}

type ServerModeProviderSettings ¶ added in v0.28.0

type ServerModeProviderSettings struct {
	Dictation  ServerModeSetting `json:"dictation,omitempty"`
	Assist     ServerModeSetting `json:"assist,omitempty"`
	VoiceAgent ServerModeSetting `json:"voice_agent,omitempty"`
}

type ServerModeSetting ¶ added in v0.28.0

type ServerModeSetting struct {
	Enabled      *bool  `json:"enabled,omitempty"`
	ProviderKind string `json:"provider_kind,omitempty"`
	ProfileID    string `json:"profile_id,omitempty"`
	Model        string `json:"model,omitempty"`
}

type ServerModelSettings ¶ added in v0.28.0

type ServerModelSettings struct {
	Version            int                        `json:"version,omitempty"`
	OnboardingComplete bool                       `json:"onboarding_complete,omitempty"`
	OnboardingVersion  string                     `json:"onboarding_version,omitempty"`
	ServerAuth         ServerAuthSettings         `json:"server_auth,omitempty"`
	Modes              ServerModeProviderSettings `json:"modes,omitempty"`
	Credentials        ServerCredentialSettings   `json:"credentials,omitempty"`
	Dictation          ServerDictationSettings    `json:"dictation,omitempty"`
	Assist             ServerAssistSettings       `json:"assist,omitempty"`
	STT                ServerSTTSettings          `json:"stt,omitempty"`
	LLM                ServerLLMSettings          `json:"llm,omitempty"`
	VoiceAgent         ServerVoiceAgentSettings   `json:"voice_agent,omitempty"`
	TTS                ServerOptionalTTSSettings  `json:"tts,omitempty"`
}

func LoadServerModelSettings ¶ added in v0.28.0

func LoadServerModelSettings(path string) (ServerModelSettings, bool, error)

func NormalizeServerModelSettings ¶ added in v0.28.0

func NormalizeServerModelSettings(settings ServerModelSettings) ServerModelSettings

func SanitizeServerModelSettings ¶ added in v0.28.0

func SanitizeServerModelSettings(settings ServerModelSettings) ServerModelSettings

type ServerOptionalTTSSettings ¶ added in v0.28.0

type ServerOptionalTTSSettings struct {
	Enabled *bool `json:"enabled,omitempty"`
}

type ServerProviderCredentialSettings ¶ added in v0.28.0

type ServerProviderCredentialSettings struct {
	Enabled *bool  `json:"enabled,omitempty"`
	Env     string `json:"env,omitempty"`
	Value   string `json:"value,omitempty"`
}

type ServerSTTSettings ¶ added in v0.28.0

type ServerSTTSettings struct {
	Enabled *bool  `json:"enabled,omitempty"`
	URL     string `json:"url,omitempty"`
	Model   string `json:"model,omitempty"`
}

type ServerVoiceAgentSettings ¶ added in v0.28.0

type ServerVoiceAgentSettings struct {
	Provider        string  `json:"provider,omitempty"`
	AgentProfileID  string  `json:"agent_profile_id,omitempty"`
	AgentSequenceID string  `json:"agent_sequence_id,omitempty"`
	PromptTemplate  *string `json:"prompt_template,omitempty"`
}

type ShortcutLocaleConfig ¶ added in v0.18.0

type ShortcutLocaleConfig struct {
	LeadingFillers []string `toml:"leading_fillers"`
	CopyLast       []string `toml:"copy_last"`
	InsertLast     []string `toml:"insert_last"`
	Summarize      []string `toml:"summarize"`
	QuickNote      []string `toml:"quick_note"`
}

type ShortcutsConfig ¶ added in v0.18.0

type ShortcutsConfig struct {
	Locale map[string]ShortcutLocaleConfig `toml:"locale"`
}

type StoreConfig ¶

type StoreConfig struct {
	Backend            string `toml:"backend"` // "sqlite" | "postgres" | registered name
	SQLitePath         string `toml:"sqlite_path"`
	PostgresDSN        string `toml:"postgres_dsn"`
	SaveAudio          bool   `toml:"save_audio"`
	AudioRetentionDays int    `toml:"audio_retention_days"`
	MaxAudioStorageMB  int    `toml:"max_audio_storage_mb"`
}

type TTSConfig ¶

type TTSConfig struct {
	Enabled     bool           `toml:"enabled"`
	Strategy    string         `toml:"strategy"` // "cloud-first", "local-first", "cloud-only", "local-only"
	Voice       string         `toml:"voice"`    // Global default voice override
	Speed       float64        `toml:"speed"`    // Global speed 0.25-4.0, default 1.0
	Format      string         `toml:"format"`   // "mp3", "wav", "opus", "pcm"
	OpenAI      TTSOpenAI      `toml:"openai"`
	Google      TTSGoogle      `toml:"google"`
	HuggingFace TTSHuggingFace `toml:"huggingface"`
	Local       TTSLocal       `toml:"local"`
}

TTSConfig configures text-to-speech for Assist Mode.

type TTSGoogle ¶

type TTSGoogle struct {
	Enabled bool   `toml:"enabled"`
	Voice   string `toml:"voice"` // e.g. "de-DE-Neural2-B"
}

type TTSHuggingFace ¶

type TTSHuggingFace struct {
	Enabled bool   `toml:"enabled"`
	Model   string `toml:"model"` // e.g. "parler-tts/parler-tts-mini-multilingual-v1.1"
}

type TTSLocal ¶

type TTSLocal struct {
	Enabled   bool   `toml:"enabled"`
	Model     string `toml:"model"`
	ModelPath string `toml:"model_path"`
	Port      int    `toml:"port"`
}

type TTSOpenAI ¶

type TTSOpenAI struct {
	Enabled bool   `toml:"enabled"`
	Model   string `toml:"model"` // "tts-1" or "tts-1-hd"
	Voice   string `toml:"voice"` // alloy, echo, fable, onyx, nova, shimmer
}

type UIConfig ¶

type UIConfig struct {
	OverlayEnabled          bool                           `toml:"overlay_enabled"`
	OverlayPosition         string                         `toml:"overlay_position"` // "top", "bottom", "left", "right"
	OverlayMovable          bool                           `toml:"overlay_movable"`
	OverlayFreeX            int                            `toml:"overlay_free_x"`
	OverlayFreeY            int                            `toml:"overlay_free_y"`
	OverlayMonitorPositions map[string]OverlayFreePosition `toml:"overlay_monitor_positions"`
	Visualizer              string                         `toml:"visualizer"`
	Design                  string                         `toml:"design"`
	AssistOverlayMode       string                         `toml:"assist_overlay_mode"`
	VoiceAgentOverlayMode   string                         `toml:"voice_agent_overlay_mode"`
}

type VPSConfig ¶

type VPSConfig struct {
	Enabled   bool   `toml:"enabled"`
	URL       string `toml:"url"`
	Model     string `toml:"model"`
	APIKeyEnv string `toml:"api_key_env"`
}

type VocabularyConfig ¶ added in v0.14.6

type VocabularyConfig struct {
	Dictionary string `toml:"dictionary"`
}

type VoiceAgentConfig ¶

type VoiceAgentConfig struct {
	Enabled bool `toml:"enabled"`
	// Provider selects the backend that drives a Voice Agent session.
	// Supported values:
	//   ""          (default) — same as "gemini"
	//   "gemini"    — Google Gemini Live (cloud, GOOGLE_AI_API_KEY required)
	//   "cascaded"  — self-hosted whisper.cpp → Genkit agent LLM → TTS pipeline
	//                 (CPU-capable; no external realtime dependency)
	//   "moshi"     — self-hosted Kyutai Moshi Rust server (GPU required, M9b)
	//
	// The Server-Target reads this field via cmd/speechkit-server; the Device-
	// Target currently always uses "gemini" and ignores it.
	Provider                        string `toml:"provider"`
	Model                           string `toml:"model"`             // Real-time model ID (e.g. "gemini-3.1-flash-live-preview")
	FallbackModel                   string `toml:"fallback_model"`    // Fallback real-time model
	Voice                           string `toml:"voice"`             // Voice name for real-time model
	AgentProfileID                  string `toml:"agent_profile_id"`  // Built-in Voice Agent profile ID; "default" preserves current behavior.
	AgentSequenceID                 string `toml:"agent_sequence_id"` // Optional workflow sequence ID; empty uses the selected persona default.
	FrameworkPrompt                 string `toml:"framework_prompt"`  // Durable host/framework instruction that defines the Voice Agent behavior
	RefinementPrompt                string `toml:"refinement_prompt"` // User-specific refinement appended to the framework prompt
	Instruction                     string `toml:"instruction"`       // Legacy alias for FrameworkPrompt
	AutoStartOnLaunch               bool   `toml:"auto_start_on_launch"`
	CloseBehavior                   string `toml:"close_behavior"` // "continue" keeps the conversation window in the taskbar; "new_chat" ends the current chat on close
	ReminderAfterIdleSec            int    `toml:"reminder_after_idle_sec"`
	DeactivateAfterIdleSec          int    `toml:"deactivate_after_idle_sec"`
	PipelineFallback                bool   `toml:"pipeline_fallback"` // Use STT -> Agent LLM -> optional TTS when the selected Voice Agent profile is not native realtime.
	ShowPrompter                    bool   `toml:"show_prompter"`     // Show live transcript prompter window
	EnableSessionSummary            bool   `toml:"enable_session_summary"`
	EnableInputTranscript           bool   `toml:"enable_input_transcript"`
	EnableOutputTranscript          bool   `toml:"enable_output_transcript"`
	EnableAffectiveDialog           bool   `toml:"enable_affective_dialog"`
	ThinkingEnabled                 bool   `toml:"thinking_enabled"`
	IncludeThoughts                 bool   `toml:"include_thoughts"`
	ThinkingBudget                  int    `toml:"thinking_budget"`
	ThinkingLevel                   string `toml:"thinking_level"`
	ContextCompressionEnabled       bool   `toml:"context_compression_enabled"`
	ContextCompressionTriggerTokens int64  `toml:"context_compression_trigger_tokens"`
	ContextCompressionTargetTokens  int64  `toml:"context_compression_target_tokens"`
	AutomaticActivityDetection      bool   `toml:"automatic_activity_detection"`
	ActivityHandling                string `toml:"activity_handling"`
	TurnCoverage                    string `toml:"turn_coverage"`
	VADStartSensitivity             string `toml:"vad_start_sensitivity"`
	VADEndSensitivity               string `toml:"vad_end_sensitivity"`
	VADPrefixPaddingMs              int    `toml:"vad_prefix_padding_ms"`
	VADSilenceDurationMs            int    `toml:"vad_silence_duration_ms"`
}

VoiceAgentConfig configures the real-time Voice Agent Mode.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL