Documentation
¶
Index ¶
- Constants
- func ApplyLocalInstallDefaults(cfg *Config, state *InstallState) bool
- func ApplyManagedDevServerDefaults(cfg *Config) bool
- func ApplyManagedIntegrationDefaults(cfg *Config) bool
- func ApplyServerAuthSettings(cfg *Config, auth ServerAuthSettings) []string
- func ApplyServerModelSettings(cfg *Config, settings ServerModelSettings) []string
- func ApplyServerModelSettingsFile(cfg *Config) ([]string, error)
- func ApplyServerRuntimeDefaults(cfg *Config) []string
- func HuggingFaceTokenEnvName(cfg *Config) string
- func HuggingFaceTokenStatus(cfg *Config) (secrets.TokenStatus, error)
- func IsFirstRun() bool
- func ManagedDevServerAvailableInBuild() bool
- func ManagedHuggingFaceAvailableInBuild() bool
- func NormalizeHotkeyBehavior(value, fallback string) string
- func NormalizeOverlayFeedbackMode(value, fallback string) string
- func NormalizeVoiceAgentCloseBehavior(value, fallback string) string
- func OverrideManagedDevServerBuildForTests(value string) func()
- func OverrideManagedHuggingFaceBuildForTests(value string) func()
- func ResolveHuggingFaceToken(cfg *Config) (string, secrets.TokenStatus, error)
- func ResolveSecret(envName string) string
- func ResolveSecretFromEnvironmentOrDoppler(envName string) string
- func Save(path string, cfg *Config) error
- func SaveInstallState(state *InstallState) error
- func SaveServerModelSettings(path string, settings ServerModelSettings) error
- func ServerSettingsPath(cfg *Config) string
- func ValidateServerProductionAuth(cfg *Config) error
- type AssistConfig
- type AudioConfig
- type Config
- type FeedbackConfig
- type GeneralConfig
- type GoogleProviderConfig
- type GroqProviderConfig
- type HuggingFaceConfig
- type InstallMode
- type InstallState
- type LocalConfig
- type LocalLLMConfig
- type ModeModelSelection
- type ModelSelectionConfig
- type OllamaProviderConfig
- type OpenAIProviderConfig
- type OpenRouterProviderConfig
- type OverlayFreePosition
- type PersonaConfig
- type ProvidersConfig
- type RoleConfig
- type RoutingConfig
- type SequenceConfig
- type SequenceStepConfig
- type ServerAssistSettings
- type ServerAuthSettings
- type ServerConfig
- type ServerConnectionConfig
- type ServerCredentialSettings
- type ServerDictationSettings
- type ServerFeaturesConfig
- type ServerLLMSettings
- type ServerModeProviderSettings
- type ServerModeSetting
- type ServerModelSettings
- type ServerOptionalTTSSettings
- type ServerProviderCredentialSettings
- type ServerSTTSettings
- type ServerVoiceAgentSettings
- type ShortcutLocaleConfig
- type ShortcutsConfig
- type StoreConfig
- type TTSConfig
- type TTSGoogle
- type TTSHuggingFace
- type TTSLocal
- type TTSOpenAI
- type UIConfig
- type VPSConfig
- type VocabularyConfig
- type VoiceAgentConfig
Constants ¶
const ( HotkeyBehaviorPushToTalk = "push_to_talk" HotkeyBehaviorToggle = "toggle" VoiceAgentCloseBehaviorContinue = "continue" VoiceAgentCloseBehaviorNewChat = "new_chat" OverlayFeedbackModeBigProductivity = "big_productivity" OverlayFeedbackModeSmallFeedback = "small_feedback" DefaultLocalLLMBaseURL = "http://127.0.0.1:8082/v1" DefaultLocalLLMModel = "ggml-org/gemma-4-E4B-it-GGUF:Q4_K_M" DefaultLocalSTTModel = "ggml-large-v3-turbo.bin" ManagedDevServerURL = "https://speechkit.kombify.dev" DefaultDictatePrimaryProfileID = "stt.local.whispercpp" DefaultAssistPrimaryProfileID = "assist.builtin.gemma4-e4b" DefaultVoiceAgentPrimaryProfileID = "realtime.builtin.pipeline" )
const ( ModeSourceLocal = "local" ModeSourceServer = "server" )
Mode source values for ModeModelSelection.ModeSource. "local" means the desktop app runs the mode against the in-process Framework kernel (default, preserves all pre-0.26 behaviour). "server" routes the mode through ServerConnection to a remote speechkit-server.
const ( ServerSettingsPathEnv = "SPEECHKIT_SERVER_SETTINGS_PATH" ServerSettingsWriteEnv = "SPEECHKIT_SERVER_SETTINGS_WRITE" ServerOnboardingUIEnv = "SPEECHKIT_SERVER_ONBOARDING_UI" ServerAuthModeManagedBearer = "managed_bearer" ServerAuthModeSelfManaged = "self_managed" )
const AllowInsecureNoAuthEnv = "SPEECHKIT_ALLOW_INSECURE_NO_AUTH"
const (
ServerSelfHostedDefaultsEnv = "SPEECHKIT_SELFHOSTED_DEFAULTS"
)
Variables ¶
This section is empty.
Functions ¶
func ApplyLocalInstallDefaults ¶ added in v0.14.1
func ApplyLocalInstallDefaults(cfg *Config, state *InstallState) bool
ApplyLocalInstallDefaults keeps a pending local install local-first while the onboarding download flow prepares the selected Whisper model.
func ApplyManagedDevServerDefaults ¶ added in v0.28.2
func ApplyServerAuthSettings ¶ added in v0.28.2
func ApplyServerAuthSettings(cfg *Config, auth ServerAuthSettings) []string
func ApplyServerModelSettings ¶ added in v0.28.0
func ApplyServerModelSettings(cfg *Config, settings ServerModelSettings) []string
func ApplyServerModelSettingsFile ¶ added in v0.28.0
func ApplyServerRuntimeDefaults ¶ added in v0.28.0
ApplyServerRuntimeDefaults turns the standalone Linux Server-Target into a working self-hosted deployment when SPEECHKIT_SELFHOSTED_DEFAULTS is set. Desktop code never calls this; it is intentionally opt-in for server containers that ship local STT/LLM sidecars.
func HuggingFaceTokenEnvName ¶
func HuggingFaceTokenStatus ¶
func HuggingFaceTokenStatus(cfg *Config) (secrets.TokenStatus, error)
func ManagedDevServerAvailableInBuild ¶ added in v0.28.2
func ManagedDevServerAvailableInBuild() bool
func ManagedHuggingFaceAvailableInBuild ¶ added in v0.14.6
func ManagedHuggingFaceAvailableInBuild() bool
func NormalizeHotkeyBehavior ¶ added in v0.21.1
func NormalizeOverlayFeedbackMode ¶ added in v0.22.4
func NormalizeVoiceAgentCloseBehavior ¶ added in v0.21.1
func OverrideManagedDevServerBuildForTests ¶ added in v0.28.2
func OverrideManagedDevServerBuildForTests(value string) func()
func OverrideManagedHuggingFaceBuildForTests ¶ added in v0.14.6
func OverrideManagedHuggingFaceBuildForTests(value string) func()
func ResolveHuggingFaceToken ¶
func ResolveHuggingFaceToken(cfg *Config) (string, secrets.TokenStatus, error)
func ResolveSecret ¶
ResolveSecret resolves a secret by name. Checks environment first, then Doppler CLI using either explicit DOPPLER_PROJECT/DOPPLER_CONFIG env vars or build-embedded managed Doppler defaults.
func ResolveSecretFromEnvironmentOrDoppler ¶ added in v0.14.8
func SaveInstallState ¶
func SaveInstallState(state *InstallState) error
SaveInstallState writes the install state to disk.
func SaveServerModelSettings ¶ added in v0.28.0
func SaveServerModelSettings(path string, settings ServerModelSettings) error
func ServerSettingsPath ¶ added in v0.28.0
func ValidateServerProductionAuth ¶ added in v0.29.0
ValidateServerProductionAuth rejects accidental public no-auth server binds. auth_mode=none remains available for local development and explicit tests.
Types ¶
type AssistConfig ¶ added in v0.28.0
type AssistConfig struct {
EnabledTools []string `toml:"enabled_tools"`
}
type AudioConfig ¶
type Config ¶
type Config struct {
General GeneralConfig `toml:"general"`
Audio AudioConfig `toml:"audio"`
UI UIConfig `toml:"ui"`
Vocabulary VocabularyConfig `toml:"vocabulary"`
Assist AssistConfig `toml:"assist"`
Shortcuts ShortcutsConfig `toml:"shortcuts"`
ModelSelection ModelSelectionConfig `toml:"model_selection"`
// ServerConnection points the device/local-target at a remote SpeechKit
// Server-Target. Only consulted when at least one mode in ModelSelection
// has mode_source = "server". Disabled by default; the desktop app runs
// fully self-contained until a user opts a mode into server-side
// execution (typically via onboarding or settings).
ServerConnection ServerConnectionConfig `toml:"server_connection"`
Local LocalConfig `toml:"local"`
LocalLLM LocalLLMConfig `toml:"local_llm"`
VPS VPSConfig `toml:"vps"`
HuggingFace HuggingFaceConfig `toml:"huggingface"`
Routing RoutingConfig `toml:"routing"`
Feedback FeedbackConfig `toml:"feedback"` // legacy compat; prefer Store
Store StoreConfig `toml:"store"`
Providers ProvidersConfig `toml:"providers"`
TTS TTSConfig `toml:"tts"`
VoiceAgent VoiceAgentConfig `toml:"voice_agent"`
// Server configures the standalone Linux server binary (cmd/speechkit-server).
// All fields are optional; the desktop app (cmd/speechkit) ignores them entirely.
Server ServerConfig `toml:"server"`
Personas []PersonaConfig `toml:"personas"`
Roles []RoleConfig `toml:"roles"`
Sequences []SequenceConfig `toml:"sequences"`
}
func (*Config) LegacyAgentHotkey ¶ added in v0.19.0
type FeedbackConfig ¶
type GeneralConfig ¶
type GeneralConfig struct {
Language string `toml:"language"`
Hotkey string `toml:"hotkey"` // Deprecated: legacy single-hotkey field kept for config file compat. Use DictateHotkey.
DictateHotkey string `toml:"dictate_hotkey"`
AssistHotkey string `toml:"assist_hotkey"`
VoiceAgentHotkey string `toml:"voice_agent_hotkey"`
DictateHotkeyBehavior string `toml:"dictate_hotkey_behavior"`
AssistHotkeyBehavior string `toml:"assist_hotkey_behavior"`
VoiceAgentHotkeyBehavior string `toml:"voice_agent_hotkey_behavior"`
DictateEnabled bool `toml:"dictate_enabled"`
AssistEnabled bool `toml:"assist_enabled"`
VoiceAgentEnabled bool `toml:"voice_agent_enabled"`
AutoStartOnLaunch bool `toml:"auto_start_on_launch"`
AgentHotkey string `toml:"agent_hotkey"`
AgentMode string `toml:"agent_mode"` // "assist" or "voice_agent" — determines what agent_hotkey triggers
ActiveMode string `toml:"active_mode"` // legacy compat
HotkeyMode string `toml:"hotkey_mode"` // legacy compat for single behavior setting
AutoStopSilenceMs int `toml:"auto_stop_silence_ms"`
FastModeSilenceMs int `toml:"fast_mode_silence_ms"` // silence threshold for Quick Capture auto-stop
ModelDownloadDir string `toml:"model_download_dir"` // Default directory for downloaded local model files
}
func (GeneralConfig) LegacyAgentHotkey ¶ added in v0.19.0
func (g GeneralConfig) LegacyAgentHotkey() string
type GoogleProviderConfig ¶
type GroqProviderConfig ¶
type HuggingFaceConfig ¶
type InstallMode ¶
type InstallMode string
InstallMode defines whether SpeechKit runs locally or connected to an external host.
const ( InstallModeLocal InstallMode = "local" InstallModeCloud InstallMode = "cloud" InstallModeNotSet InstallMode = "" )
type InstallState ¶
type InstallState struct {
Mode InstallMode `toml:"mode"`
SetupDone bool `toml:"setup_done"`
DeviceID string `toml:"device_id"`
}
InstallState persists the user's install mode choice and device identity. Stored in %APPDATA%/SpeechKit/install.toml, separate from config.toml.
func LoadInstallState ¶
func LoadInstallState() (*InstallState, error)
LoadInstallState reads the install state from disk. Returns a default (empty mode) if the file doesn't exist.
type LocalConfig ¶
type LocalLLMConfig ¶ added in v0.22.1
type LocalLLMConfig struct {
Enabled bool `toml:"enabled"`
BaseURL string `toml:"base_url"`
Model string `toml:"model"`
ModelPath string `toml:"model_path"`
Port int `toml:"port"`
GPU string `toml:"gpu"`
UtilityModel string `toml:"utility_model"`
AssistModel string `toml:"assist_model"`
AgentModel string `toml:"agent_model"`
}
type ModeModelSelection ¶ added in v0.21.1
type ModeModelSelection struct {
PrimaryProfileID string `toml:"primary_profile_id"`
FallbackProfileID string `toml:"fallback_profile_id"`
// ModeSource selects whether this mode runs locally (Framework kernel
// in-process, default) or against a remote SpeechKit Server-Target
// configured under [server_connection]. Empty string is treated as
// ModeSourceLocal so existing configs keep behaving as before.
ModeSource string `toml:"mode_source"`
}
func (ModeModelSelection) ResolvedModeSource ¶ added in v0.26.0
func (sel ModeModelSelection) ResolvedModeSource() string
ResolvedModeSource returns the effective ModeSource for this mode, normalising the empty default to ModeSourceLocal. Use this everywhere instead of reading sel.ModeSource directly so a missing TOML field does not silently mean "server".
type ModelSelectionConfig ¶ added in v0.21.1
type ModelSelectionConfig struct {
Dictate ModeModelSelection `toml:"dictate"`
Assist ModeModelSelection `toml:"assist"`
VoiceAgent ModeModelSelection `toml:"voice_agent"`
}
func BuiltInPrimaryModelSelectionDefaults ¶ added in v0.22.4
func BuiltInPrimaryModelSelectionDefaults() ModelSelectionConfig
type OllamaProviderConfig ¶
type OpenAIProviderConfig ¶
type OpenAIProviderConfig struct {
Enabled bool `toml:"enabled"`
APIKeyEnv string `toml:"api_key_env"`
STTModel string `toml:"stt_model"`
UtilityModel string `toml:"utility_model"`
AssistModel string `toml:"assist_model"`
AgentModel string `toml:"agent_model"`
TTSModel string `toml:"tts_model"`
TTSVoice string `toml:"tts_voice"`
RealtimeModel string `toml:"realtime_model"`
}
type OpenRouterProviderConfig ¶ added in v0.15.0
type OverlayFreePosition ¶ added in v0.19.0
type PersonaConfig ¶ added in v0.26.0
type PersonaConfig struct {
ID string `toml:"id"`
DisplayName string `toml:"display_name"`
Description string `toml:"description"`
Voice string `toml:"voice"`
Locale string `toml:"locale"`
DefaultRole string `toml:"default_role"`
DefaultSequence string `toml:"default_sequence"`
Tags []string `toml:"tags"`
Metadata map[string]string `toml:"metadata"`
}
PersonaConfig is a TOML-seeded Voice Agent persona. DB entries with the same ID override the TOML seed at runtime.
type ProvidersConfig ¶
type ProvidersConfig struct {
OpenAI OpenAIProviderConfig `toml:"openai"`
Groq GroqProviderConfig `toml:"groq"`
Google GoogleProviderConfig `toml:"google"`
Ollama OllamaProviderConfig `toml:"ollama"`
OpenRouter OpenRouterProviderConfig `toml:"openrouter"`
}
ProvidersConfig groups all external provider configurations.
type RoleConfig ¶ added in v0.26.0
type RoleConfig struct {
ID string `toml:"id"`
DisplayName string `toml:"display_name"`
SystemPrompt string `toml:"system_prompt"`
RefinementPrompt string `toml:"refinement_prompt"`
Locale string `toml:"locale"`
VocabularyHint string `toml:"vocabulary_hint"`
ToolAllowlist []string `toml:"tool_allowlist"`
Temperature float64 `toml:"temperature"`
ThinkingEnabled bool `toml:"thinking_enabled"`
ThinkingLevel string `toml:"thinking_level"`
IncludeThoughts bool `toml:"include_thoughts"`
ThinkingBudget int `toml:"thinking_budget"`
AutomaticActivityDetection bool `toml:"automatic_activity_detection"`
VADStartSensitivity string `toml:"vad_start_sensitivity"`
VADEndSensitivity string `toml:"vad_end_sensitivity"`
VADPrefixPaddingMs int `toml:"vad_prefix_padding_ms"`
VADSilenceDurationMs int `toml:"vad_silence_duration_ms"`
ActivityHandling string `toml:"activity_handling"`
TurnCoverage string `toml:"turn_coverage"`
ContextCompressionEnabled bool `toml:"context_compression_enabled"`
ContextCompressionTriggerTk int64 `toml:"context_compression_trigger_tokens"`
ContextCompressionTargetTk int64 `toml:"context_compression_target_tokens"`
EnableAffectiveDialog bool `toml:"enable_affective_dialog"`
}
RoleConfig is a TOML-seeded Voice Agent role. Roles are referenced from Personas via ID and compose the LiveConfig prompt layers.
type RoutingConfig ¶
type SequenceConfig ¶ added in v0.26.0
type SequenceConfig struct {
ID string `toml:"id"`
DisplayName string `toml:"display_name"`
Description string `toml:"description"`
Completion string `toml:"completion"` // "all_steps" | "explicit_close" | "max_turns"
MaxTurns int `toml:"max_turns"`
Steps []SequenceStepConfig `toml:"steps"`
}
SequenceConfig is a TOML-seeded multi-step Voice Agent workflow.
type SequenceStepConfig ¶ added in v0.26.0
type SequenceStepConfig struct {
ID string `toml:"id"`
Instruction string `toml:"instruction"`
ExitCriteria string `toml:"exit_criteria"`
RequireTools []string `toml:"require_tools"`
MaxTurns int `toml:"max_turns"`
}
SequenceStepConfig is a single step inside a SequenceConfig.
type ServerAssistSettings ¶ added in v0.28.0
type ServerAssistSettings struct {
EnabledTools []string `json:"enabled_tools,omitempty"`
}
type ServerAuthSettings ¶ added in v0.28.2
type ServerConfig ¶ added in v0.26.0
type ServerConfig struct {
ListenAddr string `toml:"listen_addr"` // e.g. ":8080"
PublicURL string `toml:"public_url"` // external API base URL, e.g. https://speechkit.example.com/api
Modes []string `toml:"modes"` // subset of ["dictation","assist","voiceagent"]; empty = all
AuthMode string `toml:"auth_mode"` // "none" | "bearer" | "edge_hmac" | "bearer_or_edge"
BearerTokenEnv string `toml:"bearer_token_env"` // env var name holding the bearer token
BearerRole string `toml:"bearer_role"` // optional role for static bearer callers, e.g. "admin"
EdgeAuthSecretEnv string `toml:"edge_auth_secret_env"` // env var name holding the HMAC secret
CORSAllowedOrigins []string `toml:"cors_allowed_origins"`
RateLimitRPS float64 `toml:"rate_limit_rps"`
RateLimitBurst int `toml:"rate_limit_burst"`
MaxUploadMB int `toml:"max_upload_mb"`
MaxVoiceAgentSessions int `toml:"max_voiceagent_sessions"` // global cap
MaxSessionsPerUser int `toml:"max_sessions_per_user"`
TicketTTLSec int `toml:"ticket_ttl_sec"` // Voice Agent WS ticket TTL
// VoiceAgentIdleTimeoutSec terminates a Voice Agent WebSocket session
// after N seconds without any client- or provider-side activity.
// Defaults to 900 (15 min). Set to 0 to disable the server-side idle
// timeout (kernel-level idle handling stays in effect either way).
VoiceAgentIdleTimeoutSec int `toml:"voiceagent_idle_timeout_sec"`
WhisperBinary string `toml:"whisper_binary"` // absolute path inside container
WhisperPort int `toml:"whisper_port"` // loopback port for whisper.cpp server
ModelDir string `toml:"model_dir"` // persistent volume, e.g. /var/lib/speechkit/models
LogFormat string `toml:"log_format"` // "json" | "text"
LogLevel string `toml:"log_level"` // "debug" | "info" | "warn" | "error"
Features ServerFeaturesConfig `toml:"features"`
}
ServerConfig configures the standalone Linux server binary. Used only by cmd/speechkit-server; the desktop app never reads these values.
type ServerConnectionConfig ¶ added in v0.26.0
type ServerConnectionConfig struct {
// Enabled gates the entire server connection. When false, every mode is
// forced to run locally regardless of its mode_source. Lets users keep
// their server URL in config but temporarily flip back to fully local.
Enabled bool `toml:"enabled"`
// URL is the base URL of the speechkit-server, e.g.
// "https://speechkit.example.com" or "http://localhost:8080".
URL string `toml:"url"`
// BearerTokenEnv names the env var that holds the bearer token sent in
// the Authorization header. Defaults to SPEECHKIT_SERVER_TOKEN. The
// value is never read from the TOML file itself — only the env var name
// is configured here.
BearerTokenEnv string `toml:"bearer_token_env"`
// FallbackToLocal makes the device app fall back to the in-process
// Framework kernel if a server call fails or the server is unreachable.
// Useful for laptop deployments that may be offline; should be false
// for kiosks that must never silently downgrade to local processing.
FallbackToLocal bool `toml:"fallback_to_local"`
// RequestTimeoutSec caps non-streaming HTTP calls (Dictation, Assist).
// 0 means no explicit timeout (the underlying http.Client default
// applies). Voice Agent WebSocket sessions are not affected.
RequestTimeoutSec int `toml:"request_timeout_sec"`
}
ServerConnectionConfig describes how the device/local-target reaches a remote SpeechKit server. Read by cmd/speechkit (and any embedded library caller) when a ModeModelSelection has mode_source = "server"; the Server-Target itself ignores this section.
type ServerCredentialSettings ¶ added in v0.28.0
type ServerCredentialSettings struct {
OpenAI ServerProviderCredentialSettings `json:"openai,omitempty"`
Groq ServerProviderCredentialSettings `json:"groq,omitempty"`
Google ServerProviderCredentialSettings `json:"google,omitempty"`
HuggingFace ServerProviderCredentialSettings `json:"huggingface,omitempty"`
OpenRouter ServerProviderCredentialSettings `json:"openrouter,omitempty"`
}
type ServerDictationSettings ¶ added in v0.28.0
type ServerDictationSettings struct {
Dictionary *string `json:"dictionary,omitempty"`
}
type ServerFeaturesConfig ¶ added in v0.30.0
type ServerLLMSettings ¶ added in v0.28.0
type ServerLLMSettings struct {
Enabled *bool `json:"enabled,omitempty"`
BaseURL string `json:"base_url,omitempty"`
UtilityModel string `json:"utility_model,omitempty"`
AssistModel string `json:"assist_model,omitempty"`
AgentModel string `json:"agent_model,omitempty"`
HFRepo string `json:"hf_repo,omitempty"`
}
type ServerModeProviderSettings ¶ added in v0.28.0
type ServerModeProviderSettings struct {
Dictation ServerModeSetting `json:"dictation,omitempty"`
Assist ServerModeSetting `json:"assist,omitempty"`
VoiceAgent ServerModeSetting `json:"voice_agent,omitempty"`
}
type ServerModeSetting ¶ added in v0.28.0
type ServerModelSettings ¶ added in v0.28.0
type ServerModelSettings struct {
Version int `json:"version,omitempty"`
OnboardingComplete bool `json:"onboarding_complete,omitempty"`
OnboardingVersion string `json:"onboarding_version,omitempty"`
ServerAuth ServerAuthSettings `json:"server_auth,omitempty"`
Modes ServerModeProviderSettings `json:"modes,omitempty"`
Credentials ServerCredentialSettings `json:"credentials,omitempty"`
Dictation ServerDictationSettings `json:"dictation,omitempty"`
Assist ServerAssistSettings `json:"assist,omitempty"`
STT ServerSTTSettings `json:"stt,omitempty"`
LLM ServerLLMSettings `json:"llm,omitempty"`
VoiceAgent ServerVoiceAgentSettings `json:"voice_agent,omitempty"`
TTS ServerOptionalTTSSettings `json:"tts,omitempty"`
}
func LoadServerModelSettings ¶ added in v0.28.0
func LoadServerModelSettings(path string) (ServerModelSettings, bool, error)
func NormalizeServerModelSettings ¶ added in v0.28.0
func NormalizeServerModelSettings(settings ServerModelSettings) ServerModelSettings
func SanitizeServerModelSettings ¶ added in v0.28.0
func SanitizeServerModelSettings(settings ServerModelSettings) ServerModelSettings
type ServerOptionalTTSSettings ¶ added in v0.28.0
type ServerOptionalTTSSettings struct {
Enabled *bool `json:"enabled,omitempty"`
}
type ServerProviderCredentialSettings ¶ added in v0.28.0
type ServerSTTSettings ¶ added in v0.28.0
type ServerVoiceAgentSettings ¶ added in v0.28.0
type ShortcutLocaleConfig ¶ added in v0.18.0
type ShortcutsConfig ¶ added in v0.18.0
type ShortcutsConfig struct {
Locale map[string]ShortcutLocaleConfig `toml:"locale"`
}
type StoreConfig ¶
type StoreConfig struct {
Backend string `toml:"backend"` // "sqlite" | "postgres" | registered name
SQLitePath string `toml:"sqlite_path"`
PostgresDSN string `toml:"postgres_dsn"`
SaveAudio bool `toml:"save_audio"`
AudioRetentionDays int `toml:"audio_retention_days"`
MaxAudioStorageMB int `toml:"max_audio_storage_mb"`
}
type TTSConfig ¶
type TTSConfig struct {
Enabled bool `toml:"enabled"`
Strategy string `toml:"strategy"` // "cloud-first", "local-first", "cloud-only", "local-only"
Voice string `toml:"voice"` // Global default voice override
Speed float64 `toml:"speed"` // Global speed 0.25-4.0, default 1.0
Format string `toml:"format"` // "mp3", "wav", "opus", "pcm"
OpenAI TTSOpenAI `toml:"openai"`
Google TTSGoogle `toml:"google"`
HuggingFace TTSHuggingFace `toml:"huggingface"`
Local TTSLocal `toml:"local"`
}
TTSConfig configures text-to-speech for Assist Mode.
type TTSHuggingFace ¶
type UIConfig ¶
type UIConfig struct {
OverlayEnabled bool `toml:"overlay_enabled"`
OverlayPosition string `toml:"overlay_position"` // "top", "bottom", "left", "right"
OverlayMovable bool `toml:"overlay_movable"`
OverlayFreeX int `toml:"overlay_free_x"`
OverlayFreeY int `toml:"overlay_free_y"`
OverlayMonitorPositions map[string]OverlayFreePosition `toml:"overlay_monitor_positions"`
Visualizer string `toml:"visualizer"`
Design string `toml:"design"`
AssistOverlayMode string `toml:"assist_overlay_mode"`
VoiceAgentOverlayMode string `toml:"voice_agent_overlay_mode"`
}
type VocabularyConfig ¶ added in v0.14.6
type VocabularyConfig struct {
Dictionary string `toml:"dictionary"`
}
type VoiceAgentConfig ¶
type VoiceAgentConfig struct {
Enabled bool `toml:"enabled"`
// Provider selects the backend that drives a Voice Agent session.
// Supported values:
// "" (default) — same as "gemini"
// "gemini" — Google Gemini Live (cloud, GOOGLE_AI_API_KEY required)
// "cascaded" — self-hosted whisper.cpp → Genkit agent LLM → TTS pipeline
// (CPU-capable; no external realtime dependency)
// "moshi" — self-hosted Kyutai Moshi Rust server (GPU required, M9b)
//
// The Server-Target reads this field via cmd/speechkit-server; the Device-
// Target currently always uses "gemini" and ignores it.
Provider string `toml:"provider"`
Model string `toml:"model"` // Real-time model ID (e.g. "gemini-3.1-flash-live-preview")
FallbackModel string `toml:"fallback_model"` // Fallback real-time model
Voice string `toml:"voice"` // Voice name for real-time model
AgentProfileID string `toml:"agent_profile_id"` // Built-in Voice Agent profile ID; "default" preserves current behavior.
AgentSequenceID string `toml:"agent_sequence_id"` // Optional workflow sequence ID; empty uses the selected persona default.
FrameworkPrompt string `toml:"framework_prompt"` // Durable host/framework instruction that defines the Voice Agent behavior
RefinementPrompt string `toml:"refinement_prompt"` // User-specific refinement appended to the framework prompt
Instruction string `toml:"instruction"` // Legacy alias for FrameworkPrompt
AutoStartOnLaunch bool `toml:"auto_start_on_launch"`
CloseBehavior string `toml:"close_behavior"` // "continue" keeps the conversation window in the taskbar; "new_chat" ends the current chat on close
ReminderAfterIdleSec int `toml:"reminder_after_idle_sec"`
DeactivateAfterIdleSec int `toml:"deactivate_after_idle_sec"`
PipelineFallback bool `toml:"pipeline_fallback"` // Use STT -> Agent LLM -> optional TTS when the selected Voice Agent profile is not native realtime.
ShowPrompter bool `toml:"show_prompter"` // Show live transcript prompter window
EnableSessionSummary bool `toml:"enable_session_summary"`
EnableInputTranscript bool `toml:"enable_input_transcript"`
EnableOutputTranscript bool `toml:"enable_output_transcript"`
EnableAffectiveDialog bool `toml:"enable_affective_dialog"`
ThinkingEnabled bool `toml:"thinking_enabled"`
IncludeThoughts bool `toml:"include_thoughts"`
ThinkingBudget int `toml:"thinking_budget"`
ThinkingLevel string `toml:"thinking_level"`
ContextCompressionEnabled bool `toml:"context_compression_enabled"`
ContextCompressionTriggerTokens int64 `toml:"context_compression_trigger_tokens"`
ContextCompressionTargetTokens int64 `toml:"context_compression_target_tokens"`
AutomaticActivityDetection bool `toml:"automatic_activity_detection"`
ActivityHandling string `toml:"activity_handling"`
TurnCoverage string `toml:"turn_coverage"`
VADStartSensitivity string `toml:"vad_start_sensitivity"`
VADEndSensitivity string `toml:"vad_end_sensitivity"`
VADPrefixPaddingMs int `toml:"vad_prefix_padding_ms"`
VADSilenceDurationMs int `toml:"vad_silence_duration_ms"`
}
VoiceAgentConfig configures the real-time Voice Agent Mode.