Documentation
¶
Index ¶
- Constants
- Variables
- func CurrentSTTAuthorizationState() (status int, authErr error, initialized bool)
- func GetTCCAppName() string
- func NewWindowsNativeASR() stt.Provider
- func RequestSTTAuthorization() (int, error)
- func RunMainRunLoop()
- func StopMainRunLoop()
- func SubmitToMainThread(fn func())
- type ASRConfig
- type ASRStatus
- type ComponentDownloadStatus
- type Config
- type DownloadRequest
- type DownloadStatus
- type EspeakLanguagePack
- type EspeakManager
- func (em *EspeakManager) GetDataSize() int64
- func (em *EspeakManager) GetLibraryPath() string
- func (em *EspeakManager) GetPlatformInfo() (string, string)
- func (em *EspeakManager) IsLibraryInstalled() bool
- func (em *EspeakManager) LanguageCount() int
- func (em *EspeakManager) ListLanguagePacks() []*EspeakLanguagePack
- func (em *EspeakManager) Refresh()
- type EspeakStatus
- type Handler
- func (h *Handler) CancelASRDownload(c echo.Context) error
- func (h *Handler) CancelKokoroDownload(c echo.Context) error
- func (h *Handler) CancelVocoderDownload(c echo.Context) error
- func (h *Handler) DownloadASRModel(c echo.Context) error
- func (h *Handler) DownloadKokoro(c echo.Context) error
- func (h *Handler) DownloadVocoder(c echo.Context) error
- func (h *Handler) GetEspeakManager() *EspeakManager
- func (h *Handler) GetOfflineLanguages(c echo.Context) error
- func (h *Handler) GetPersistedTTSProvider() string
- func (h *Handler) GetStatus(c echo.Context) error
- func (h *Handler) GetTTSConfig(c echo.Context) error
- func (h *Handler) Init(c echo.Context) error
- func (h *Handler) ListEspeakLanguages(c echo.Context) error
- func (h *Handler) RegisterRoutes(g *echo.Group)
- func (h *Handler) RestoreEditBeforeSend()
- func (h *Handler) Service() Service
- func (h *Handler) SetASROnDevice(c echo.Context) error
- func (h *Handler) SetEditBeforeSend(c echo.Context) error
- func (h *Handler) SetTTSConfig(c echo.Context) error
- func (h *Handler) SwitchASRModel(c echo.Context) error
- func (h *Handler) SwitchTTSProvider(c echo.Context) error
- func (h *Handler) Transcribe(c echo.Context) error
- type InitConfig
- type MacOSNativeSTT
- func (p *MacOSNativeSTT) Available() bool
- func (p *MacOSNativeSTT) Close()
- func (p *MacOSNativeSTT) DictationAvailable() bool
- func (p *MacOSNativeSTT) Initialize() error
- func (p *MacOSNativeSTT) MaxDuration() time.Duration
- func (p *MacOSNativeSTT) Name() string
- func (p *MacOSNativeSTT) OfflineDictationLanguages() []string
- func (p *MacOSNativeSTT) RequireOnDevice() bool
- func (p *MacOSNativeSTT) SetRequireOnDevice(_ bool)
- func (p *MacOSNativeSTT) SupportedFormats() []stt.AudioFormat
- func (p *MacOSNativeSTT) SupportsOnDevice() bool
- func (p *MacOSNativeSTT) Transcribe(_ context.Context, _ *stt.TranscribeRequest) (*stt.TranscribeResponse, error)
- func (p *MacOSNativeSTT) TranscribeStream(_ context.Context, _ *stt.TranscribeRequest, _ stt.StreamCallback) error
- func (p *MacOSNativeSTT) Type() stt.ProviderType
- type ModelInfo
- type ModelsResponse
- type OnDeviceUnavailableError
- type Progress
- type Service
- type SpeechError
- type StatusResponse
- type SwitchProviderRequest
- type SwitchRequest
- type TTSConfig
- type TTSConfigRequest
- type TTSStatus
- type TranscriptionResult
Constants ¶
const ProviderMacOSNative stt.ProviderType = "macos-native"
Variables ¶
var (
ErrNoASRProvider = &SpeechError{Code: "NO_ASR_PROVIDER", Message: "no ASR provider available"}
)
Error definitions
Functions ¶
func CurrentSTTAuthorizationState ¶
CurrentSTTAuthorizationState reports that macOS STT is unavailable on non-darwin builds.
func NewWindowsNativeASR ¶
func RequestSTTAuthorization ¶
RequestSTTAuthorization is a no-op on non-macOS.
func SubmitToMainThread ¶
func SubmitToMainThread(fn func())
SubmitToMainThread is a no-op on non-macOS — just runs inline.
Types ¶
type ASRConfig ¶
type ASRConfig struct {
Enabled bool `json:"enabled" yaml:"enabled" yaml:"enabled"`
Provider string `json:"provider" yaml:"provider" yaml:"provider"`
Model string `json:"model" yaml:"model" yaml:"model"`
ModelDir string `json:"model_dir" yaml:"model_dir" yaml:"model_dir"`
DefaultLang string `json:"default_language" yaml:"default_language" yaml:"default_language"`
EditBeforeSend bool `json:"edit_before_send" yaml:"edit_before_send" yaml:"edit_before_send"`
MaxDuration time.Duration `json:"max_duration" yaml:"max_duration" yaml:"max_duration"`
}
ASRConfig holds ASR-specific configuration.
type ASRStatus ¶
type ASRStatus struct {
Ready bool `json:"ready"`
Provider string `json:"provider"`
ModelName string `json:"model_name,omitempty"`
StreamingSupported bool `json:"streaming_supported"`
EditBeforeSend bool `json:"edit_before_send"`
Downloading bool `json:"downloading"`
Progress *Progress `json:"progress,omitempty"`
Downloads []DownloadStatus `json:"downloads,omitempty"`
HasPending bool `json:"has_pending"`
PermissionDenied bool `json:"permission_denied,omitempty"`
PermissionError string `json:"permission_error,omitempty"`
PermissionAppName string `json:"permission_app_name,omitempty"`
OnDeviceSupported bool `json:"on_device_supported,omitempty"`
OnDeviceOnly bool `json:"on_device_only,omitempty"`
DictationAvailable bool `json:"dictation_available,omitempty"`
OfflineLanguages []string `json:"offline_languages,omitempty"`
Models []interface{} `json:"models"`
AvailableProviders []string `json:"available_providers,omitempty"`
}
ASRStatus represents ASR status.
type ComponentDownloadStatus ¶
type ComponentDownloadStatus struct {
Ready bool `json:"ready"`
Downloading bool `json:"downloading"`
Progress float64 `json:"progress"`
Error string `json:"error,omitempty"`
InitStage string `json:"init_stage,omitempty"`
Speed string `json:"speed,omitempty"`
ETA string `json:"eta,omitempty"`
File string `json:"file,omitempty"`
FileIndex int `json:"file_index,omitempty"`
TotalFiles int `json:"total_files,omitempty"`
DownloadedSize string `json:"downloaded_human,omitempty"`
}
ComponentDownloadStatus represents the download/readiness status of a TTS component (e.g. Kokoro model, vocoder).
type Config ¶
type Config struct {
TTS TTSConfig `json:"tts" yaml:"tts" yaml:"tts"`
ASR ASRConfig `json:"asr" yaml:"asr" yaml:"asr"`
}
Config holds unified speech configuration.
type DownloadRequest ¶
type DownloadRequest struct {
ModelType string `json:"model_type"`
}
DownloadRequest represents a model download request.
type DownloadStatus ¶
type DownloadStatus struct {
ModelType string `json:"model_type"`
Progress Progress `json:"progress"`
}
DownloadStatus represents a single model download status.
type EspeakLanguagePack ¶
type EspeakLanguagePack struct {
Code string `json:"code"`
Name string `json:"name"`
Downloaded bool `json:"downloaded"`
Size int64 `json:"size"` // actual file size in bytes
}
EspeakLanguagePack represents an eSpeak-NG language dictionary.
type EspeakManager ¶
type EspeakManager struct {
// contains filtered or unexported fields
}
EspeakManager checks the real espeak-ng-data directory for available language dictionaries. The eSpeak-NG engine is statically linked via CGO, so there is no library to download — only the data directory needs to be present at runtime.
func NewEspeakManager ¶
func NewEspeakManager(dataDir string) *EspeakManager
NewEspeakManager creates a new EspeakManager that detects the real espeak-ng-data directory using the same search logic as the CGO provider.
func (*EspeakManager) GetDataSize ¶
func (em *EspeakManager) GetDataSize() int64
GetDataSize returns the total size of the espeak-ng-data directory in bytes.
func (*EspeakManager) GetLibraryPath ¶
func (em *EspeakManager) GetLibraryPath() string
GetLibraryPath returns the detected espeak-ng-data path.
func (*EspeakManager) GetPlatformInfo ¶
func (em *EspeakManager) GetPlatformInfo() (string, string)
GetPlatformInfo returns OS/arch info (useful for status display).
func (*EspeakManager) IsLibraryInstalled ¶
func (em *EspeakManager) IsLibraryInstalled() bool
IsLibraryInstalled returns true if the espeak-ng-data directory was found. The engine itself is statically linked; this checks for runtime data.
func (*EspeakManager) LanguageCount ¶
func (em *EspeakManager) LanguageCount() int
LanguageCount returns the number of *_dict files found.
func (*EspeakManager) ListLanguagePacks ¶
func (em *EspeakManager) ListLanguagePacks() []*EspeakLanguagePack
ListLanguagePacks scans the espeak-ng-data directory for *_dict files and returns the list with real file sizes.
func (*EspeakManager) Refresh ¶
func (em *EspeakManager) Refresh()
Refresh re-scans for the espeak-ng-data directory.
type EspeakStatus ¶
type EspeakStatus struct {
Installed bool `json:"installed"`
Path string `json:"path,omitempty"`
LanguageCount int `json:"language_count"`
DataSize int64 `json:"data_size"`
StaticLinked bool `json:"static_linked"`
}
EspeakStatus represents the eSpeak-NG runtime data status.
type Handler ¶
type Handler struct {
// contains filtered or unexported fields
}
Handler handles unified speech HTTP requests.
func NewHandler ¶
NewHandler creates a new speech handler.
func (*Handler) CancelASRDownload ¶
CancelASRDownload cancels the current ASR model download.
func (*Handler) CancelKokoroDownload ¶
CancelKokoroDownload cancels the Kokoro download.
func (*Handler) CancelVocoderDownload ¶
CancelVocoderDownload cancels the vocoder download.
func (*Handler) DownloadASRModel ¶
DownloadASRModel starts downloading an ASR model.
func (*Handler) DownloadKokoro ¶
DownloadKokoro starts downloading the Kokoro model.
func (*Handler) DownloadVocoder ¶
DownloadVocoder starts downloading the vocoder model.
func (*Handler) GetEspeakManager ¶
func (h *Handler) GetEspeakManager() *EspeakManager
GetEspeakManager returns the EspeakManager instance.
func (*Handler) GetOfflineLanguages ¶
GetOfflineLanguages returns installed offline dictation languages (macOS/Windows native). Separated from /status because it reads system settings which is non-critical info.
func (*Handler) GetPersistedTTSProvider ¶
GetPersistedTTSProvider returns the persisted TTS provider from kvstore, or empty string if not set.
func (*Handler) GetTTSConfig ¶
GetTTSConfig returns the current TTS configuration.
func (*Handler) ListEspeakLanguages ¶
ListEspeakLanguages returns available eSpeak-NG language dictionaries by scanning the real espeak-ng-data directory.
func (*Handler) RegisterRoutes ¶
RegisterRoutes registers unified speech management routes.
func (*Handler) RestoreEditBeforeSend ¶
func (h *Handler) RestoreEditBeforeSend()
RestoreEditBeforeSend restores the persisted edit-before-send setting from kvstore.
func (*Handler) SetASROnDevice ¶
SetASROnDevice toggles on-device-only mode for macOS native STT.
func (*Handler) SetEditBeforeSend ¶
SetEditBeforeSend toggles the edit-before-send setting for ASR.
func (*Handler) SetTTSConfig ¶
SetTTSConfig updates the TTS configuration.
func (*Handler) SwitchASRModel ¶
SwitchASRModel switches to a different ASR model or provider.
func (*Handler) SwitchTTSProvider ¶
SwitchTTSProvider switches the TTS provider.
type InitConfig ¶
InitConfig holds configuration for lazy initialization of TTS/STT services.
type MacOSNativeSTT ¶
type MacOSNativeSTT struct{}
MacOSNativeSTT stub for non-macOS systems
func NewMacOSNativeSTT ¶
func NewMacOSNativeSTT() *MacOSNativeSTT
NewMacOSNativeSTT creates a stub provider
func (*MacOSNativeSTT) Available ¶
func (p *MacOSNativeSTT) Available() bool
Available returns false on non-macOS
func (*MacOSNativeSTT) DictationAvailable ¶
func (p *MacOSNativeSTT) DictationAvailable() bool
func (*MacOSNativeSTT) Initialize ¶
func (p *MacOSNativeSTT) Initialize() error
Initialize returns error on non-macOS
func (*MacOSNativeSTT) MaxDuration ¶
func (p *MacOSNativeSTT) MaxDuration() time.Duration
MaxDuration returns zero
func (*MacOSNativeSTT) OfflineDictationLanguages ¶
func (p *MacOSNativeSTT) OfflineDictationLanguages() []string
func (*MacOSNativeSTT) RequireOnDevice ¶
func (p *MacOSNativeSTT) RequireOnDevice() bool
func (*MacOSNativeSTT) SetRequireOnDevice ¶
func (p *MacOSNativeSTT) SetRequireOnDevice(_ bool)
func (*MacOSNativeSTT) SupportedFormats ¶
func (p *MacOSNativeSTT) SupportedFormats() []stt.AudioFormat
SupportedFormats returns empty list
func (*MacOSNativeSTT) SupportsOnDevice ¶
func (p *MacOSNativeSTT) SupportsOnDevice() bool
func (*MacOSNativeSTT) Transcribe ¶
func (p *MacOSNativeSTT) Transcribe(_ context.Context, _ *stt.TranscribeRequest) (*stt.TranscribeResponse, error)
Transcribe returns error
func (*MacOSNativeSTT) TranscribeStream ¶
func (p *MacOSNativeSTT) TranscribeStream(_ context.Context, _ *stt.TranscribeRequest, _ stt.StreamCallback) error
TranscribeStream returns error
func (*MacOSNativeSTT) Type ¶
func (p *MacOSNativeSTT) Type() stt.ProviderType
Type returns the provider type
type ModelInfo ¶
type ModelInfo struct {
ID string `json:"id"`
Name string `json:"name"`
Description string `json:"description"`
Type string `json:"type"` // "tts" or "asr"
Languages []string `json:"languages,omitempty"`
Size string `json:"size"`
Streaming bool `json:"streaming,omitempty"`
Downloaded bool `json:"downloaded"`
}
ModelInfo represents information about a speech model.
type ModelsResponse ¶
ModelsResponse represents the list of available models.
type OnDeviceUnavailableError ¶
type OnDeviceUnavailableError struct {
}
OnDeviceUnavailableError indicates on-device recognition failed for the locale.
func (*OnDeviceUnavailableError) Error ¶
func (e *OnDeviceUnavailableError) Error() string
type Progress ¶
type Progress struct {
File string `json:"file"`
Downloaded int64 `json:"downloaded"`
Total int64 `json:"total"`
Percentage float64 `json:"percentage"`
SpeedHuman string `json:"speed_human"`
ETA string `json:"eta"`
}
Progress represents download progress.
type Service ¶
type Service interface {
// GetStatus returns the unified speech status.
GetStatus() *StatusResponse
// GetModels returns all available models.
GetModels() *ModelsResponse
// GetASRProvider returns the ASR provider if available.
GetASRProvider() stt.Provider
// GetTTSProvider returns the TTS provider if available.
GetTTSProvider() tts.Provider
// GetSTTService returns the underlying STT service.
GetSTTService() stt.Service
// GetTTSService returns the underlying TTS service.
GetTTSService() tts.Service
// IsEditBeforeSendEnabled returns whether edit-before-send is enabled.
IsEditBeforeSendEnabled() bool
// SetEditBeforeSend enables or disables edit-before-send.
SetEditBeforeSend(enabled bool)
// Initialize initializes TTS/STT services lazily.
Initialize() error
// IsInitialized returns whether services have been initialized.
IsInitialized() bool
// SetTTSProvider sets the TTS provider.
SetTTSProvider(provider tts.Provider)
// SetASRProvider sets the ASR provider.
SetASRProvider(provider stt.Provider)
// SetASRPermissionDenied records that ASR permission was denied (e.g. macOS TCC).
SetASRPermissionDenied(errMsg string)
// SetEspeakManager sets the eSpeak manager for status reporting.
SetEspeakManager(em *EspeakManager)
// SetStatusPrewarmEnabled controls whether GetStatus may trigger lazy STT initialization.
SetStatusPrewarmEnabled(enabled bool)
}
Service provides unified speech functionality (TTS + ASR).
func NewService ¶
NewService creates a new unified speech service.
func NewServiceWithInitConfig ¶
func NewServiceWithInitConfig(cfg *Config, initCfg *InitConfig) Service
NewServiceWithInitConfig creates a service with lazy initialization config.
type SpeechError ¶
SpeechError represents a speech-related error.
func (*SpeechError) Error ¶
func (e *SpeechError) Error() string
type StatusResponse ¶
type StatusResponse struct {
TTS TTSStatus `json:"tts"`
ASR ASRStatus `json:"asr"`
Espeak *EspeakStatus `json:"espeak,omitempty"`
}
StatusResponse represents the unified speech status.
type SwitchProviderRequest ¶
type SwitchProviderRequest struct {
Provider string `json:"provider"`
}
SwitchProviderRequest represents a TTS provider switch request.
type SwitchRequest ¶
type SwitchRequest struct {
ModelType string `json:"model_type"`
}
SwitchRequest represents a model switch request.
type TTSConfig ¶
type TTSConfig struct {
Provider string `json:"provider" yaml:"provider" yaml:"provider"`
Model string `json:"model" yaml:"model" yaml:"model"`
Speed float32 `json:"speed" yaml:"speed" yaml:"speed"` // Speech rate (0.5-2.0, default 1.0)
Pitch float32 `json:"pitch" yaml:"pitch" yaml:"pitch"` // Pitch adjustment (-10 to 10, default 0)
Volume float32 `json:"volume" yaml:"volume" yaml:"volume"` // Volume (0-100, default 100)
}
TTSConfig holds TTS-specific configuration.
type TTSConfigRequest ¶
type TTSConfigRequest struct {
Speed float32 `json:"speed"`
Pitch float32 `json:"pitch"`
Volume float32 `json:"volume"`
}
TTSConfigRequest represents a TTS config update request.
type TTSStatus ¶
type TTSStatus struct {
Ready bool `json:"ready"`
Provider string `json:"provider"`
ModelName string `json:"model_name,omitempty"`
AvailableProviders []string `json:"available_providers,omitempty"`
Models []interface{} `json:"models"`
Components map[string]*ComponentDownloadStatus `json:"components,omitempty"`
}
TTSStatus represents TTS status.
type TranscriptionResult ¶
type TranscriptionResult struct {
Text string `json:"text"`
Language string `json:"language,omitempty"`
Duration float64 `json:"duration,omitempty"`
Confidence float64 `json:"confidence,omitempty"`
Editable bool `json:"editable"`
SessionID string `json:"session_id,omitempty"`
}
TranscriptionResult with edit support.