Documentation
¶
Overview ¶
Package tts provides text-to-speech functionality. This package implements TTS provider interfaces for synthesizing speech from text.
Index ¶
- Variables
- func DetectLanguage(text string) (lang string, ratio float64)
- func KokoroAvailable() bool
- func NewKokoroProvider(_ string) *kokoroStub
- func WindowsNativeAvailable() bool
- type AudioFormat
- type AudioPreprocessor
- type Consent
- type ConsentManager
- func (cm *ConsentManager) GetConsent(ctx context.Context, userID, service string) (*Consent, error)
- func (cm *ConsentManager) HasConsent(ctx context.Context, userID, service string) (bool, error)
- func (cm *ConsentManager) SetConsent(ctx context.Context, userID, service string, given bool, version string) error
- type EdgeTTSProvider
- func (p *EdgeTTSProvider) IsAvailable() bool
- func (p *EdgeTTSProvider) ListVoices(ctx context.Context) ([]Voice, error)
- func (p *EdgeTTSProvider) MaxTextLength() int
- func (p *EdgeTTSProvider) Name() string
- func (p *EdgeTTSProvider) SupportedFormats() []AudioFormat
- func (p *EdgeTTSProvider) Synthesize(ctx context.Context, req *SynthesizeRequest) (*SynthesizeResponse, error)
- func (p *EdgeTTSProvider) SynthesizeStream(ctx context.Context, req *SynthesizeRequest, callback StreamCallback) error
- func (p *EdgeTTSProvider) Type() ProviderType
- type EspeakModelManager
- type EspeakNGAdapter
- func (a *EspeakNGAdapter) Available() bool
- func (a *EspeakNGAdapter) Close()
- func (a *EspeakNGAdapter) GetProvider() *EspeakNGProvider
- func (a *EspeakNGAdapter) ListVoices(_ context.Context) ([]Voice, error)
- func (a *EspeakNGAdapter) MaxTextLength() int
- func (a *EspeakNGAdapter) Name() string
- func (a *EspeakNGAdapter) SupportedFormats() []AudioFormat
- func (a *EspeakNGAdapter) Synthesize(_ context.Context, _ *SynthesizeRequest) (*SynthesizeResponse, error)
- func (a *EspeakNGAdapter) SynthesizeStream(_ context.Context, _ *SynthesizeRequest, _ StreamCallback) error
- func (a *EspeakNGAdapter) Type() ProviderType
- type EspeakNGProvider
- func (p *EspeakNGProvider) Close()
- func (p *EspeakNGProvider) Initialize() error
- func (p *EspeakNGProvider) Name() string
- func (p *EspeakNGProvider) SupportedLanguages() []string
- func (p *EspeakNGProvider) Synthesize(_ context.Context, _ *EspeakNGRequest) (io.ReadCloser, error)
- func (p *EspeakNGProvider) TextToPhonemes(_, _ string) (string, error)
- func (p *EspeakNGProvider) Type() string
- type EspeakNGRequest
- type Handler
- type KokoroModelManager
- func (m *KokoroModelManager) CancelDownload() error
- func (m *KokoroModelManager) DownloadModel(ctx context.Context) error
- func (m *KokoroModelManager) GetDownloadProgress() (float64, string, error)
- func (m *KokoroModelManager) GetModelPath() string
- func (m *KokoroModelManager) GetModelStatus() map[string]interface{}
- func (m *KokoroModelManager) IsModelReady() bool
- type LanguageStats
- type LocalSpeaker
- type MacOSNativeTTS
- func (p *MacOSNativeTTS) Available() bool
- func (p *MacOSNativeTTS) Close()
- func (p *MacOSNativeTTS) Initialize() error
- func (p *MacOSNativeTTS) ListVoices(ctx context.Context) ([]Voice, error)
- func (p *MacOSNativeTTS) MaxTextLength() int
- func (p *MacOSNativeTTS) Name() string
- func (p *MacOSNativeTTS) SupportedFormats() []AudioFormat
- func (p *MacOSNativeTTS) Synthesize(ctx context.Context, req *SynthesizeRequest) (*SynthesizeResponse, error)
- func (p *MacOSNativeTTS) SynthesizeStream(ctx context.Context, req *SynthesizeRequest, callback StreamCallback) error
- func (p *MacOSNativeTTS) Type() ProviderType
- type Phonemizer
- type Provider
- type ProviderConfig
- type ProviderFactory
- type ProviderType
- type Service
- type ServiceConfig
- type StreamCallback
- type SynthesizeRequest
- type SynthesizeResponse
- type VocoderModelManager
- type Voice
Constants ¶
This section is empty.
Variables ¶
var ( // ErrProviderNotFound is returned when a provider is not found. ErrProviderNotFound = errors.New("TTS provider not found") // ErrProviderDisabled is returned when a provider is disabled. ErrProviderDisabled = errors.New("TTS provider disabled") // ErrInvalidText is returned when the text is invalid. ErrInvalidText = errors.New("invalid text") // ErrSynthesisFailed is returned when synthesis fails. ErrSynthesisFailed = errors.New("synthesis failed") // ErrTextTooLong is returned when the text is too long. ErrTextTooLong = errors.New("text too long") // ErrVoiceNotFound is returned when the voice is not found. ErrVoiceNotFound = errors.New("voice not found") // ErrNoProviderConfigured is returned when no provider is configured. ErrNoProviderConfigured = errors.New("no TTS provider configured, please configure one in settings") )
Functions ¶
func DetectLanguage ¶
DetectLanguage detects the primary language of text based on Unicode ranges. Returns the detected language code and confidence ratio (0.0-1.0).
func KokoroAvailable ¶
func KokoroAvailable() bool
KokoroAvailable reports whether Kokoro was compiled in.
func NewKokoroProvider ¶
func NewKokoroProvider(_ string) *kokoroStub
func WindowsNativeAvailable ¶
func WindowsNativeAvailable() bool
WindowsNativeAvailable checks if Windows native TTS is available
Types ¶
type AudioFormat ¶
type AudioFormat string
AudioFormat represents the output audio format.
const ( // FormatMP3 is MP3 audio format. FormatMP3 AudioFormat = "mp3" // FormatOPUS is OPUS audio format. FormatOPUS AudioFormat = "opus" // FormatAAC is AAC audio format. FormatAAC AudioFormat = "aac" // FormatFLAC is FLAC audio format. FormatFLAC AudioFormat = "flac" // FormatWAV is WAV audio format. FormatWAV AudioFormat = "wav" // FormatPCM is raw PCM audio format. FormatPCM AudioFormat = "pcm" )
type AudioPreprocessor ¶
type AudioPreprocessor struct {
// contains filtered or unexported fields
}
AudioPreprocessor handles audio preprocessing before vocoder
func NewAudioPreprocessor ¶
func NewAudioPreprocessor(sampleRate int) *AudioPreprocessor
NewAudioPreprocessor creates a new audio preprocessor
func (*AudioPreprocessor) ApplyEQ ¶
func (p *AudioPreprocessor) ApplyEQ(samples []int16) []int16
ApplyEQ applies EQ filter (boost 1kHz by 3dB)
func (*AudioPreprocessor) ApplyLowpass ¶
func (p *AudioPreprocessor) ApplyLowpass(samples []int16) []int16
ApplyLowpass applies lowpass filter at 7kHz
func (*AudioPreprocessor) Preprocess ¶
func (p *AudioPreprocessor) Preprocess(samples []int16) []int16
Preprocess applies full preprocessing chain
type Consent ¶
type Consent struct {
ID string `json:"id"`
UserID string `json:"user_id"`
Service string `json:"service"` // 'edge-tts', 'espeak'
ConsentGiven bool `json:"consent_given"`
ConsentDate *time.Time `json:"consent_date,omitempty"`
ConsentVersion string `json:"consent_version"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
Consent represents a user's consent for a service
type ConsentManager ¶
type ConsentManager struct {
// contains filtered or unexported fields
}
ConsentManager handles user privacy consent for online TTS services
func NewConsentManager ¶
func NewConsentManager(db *sql.DB) *ConsentManager
NewConsentManager creates a new consent manager
func NewConsentManagerWithReadDB ¶
func NewConsentManagerWithReadDB(writeDB, readDB *sql.DB) *ConsentManager
NewConsentManagerWithReadDB creates a new consent manager with separate write and read database handles.
func (*ConsentManager) GetConsent ¶
GetConsent retrieves user consent for a service
func (*ConsentManager) HasConsent ¶
HasConsent checks if user has given consent
func (*ConsentManager) SetConsent ¶
func (cm *ConsentManager) SetConsent(ctx context.Context, userID, service string, given bool, version string) error
SetConsent saves or updates user consent
type EdgeTTSProvider ¶
type EdgeTTSProvider struct {
// contains filtered or unexported fields
}
EdgeTTSProvider implements the Provider interface for Microsoft Edge TTS.
func NewEdgeTTSProvider ¶
func NewEdgeTTSProvider() *EdgeTTSProvider
NewEdgeTTSProvider creates a new Edge TTS provider.
func (*EdgeTTSProvider) IsAvailable ¶
func (p *EdgeTTSProvider) IsAvailable() bool
IsAvailable always returns true for Edge TTS.
func (*EdgeTTSProvider) ListVoices ¶
func (p *EdgeTTSProvider) ListVoices(ctx context.Context) ([]Voice, error)
ListVoices returns available voices.
func (*EdgeTTSProvider) MaxTextLength ¶
func (p *EdgeTTSProvider) MaxTextLength() int
MaxTextLength returns the maximum text length.
func (*EdgeTTSProvider) Name ¶
func (p *EdgeTTSProvider) Name() string
Name returns the provider name.
func (*EdgeTTSProvider) SupportedFormats ¶
func (p *EdgeTTSProvider) SupportedFormats() []AudioFormat
SupportedFormats returns the supported audio formats.
func (*EdgeTTSProvider) Synthesize ¶
func (p *EdgeTTSProvider) Synthesize(ctx context.Context, req *SynthesizeRequest) (*SynthesizeResponse, error)
Synthesize synthesizes text to speech using edge-tts-go library.
func (*EdgeTTSProvider) SynthesizeStream ¶
func (p *EdgeTTSProvider) SynthesizeStream(ctx context.Context, req *SynthesizeRequest, callback StreamCallback) error
SynthesizeStream synthesizes text with streaming audio output. Sends the complete audio as a single callback because the SSE frontend closes the EventSource after the first audio event.
func (*EdgeTTSProvider) Type ¶
func (p *EdgeTTSProvider) Type() ProviderType
Type returns the provider type.
type EspeakModelManager ¶
type EspeakModelManager struct{}
func NewEspeakModelManager ¶
func NewEspeakModelManager(dataPath string) *EspeakModelManager
func (*EspeakModelManager) EnsureVoiceData ¶
func (m *EspeakModelManager) EnsureVoiceData(ctx context.Context) error
type EspeakNGAdapter ¶
type EspeakNGAdapter struct{}
EspeakNGAdapter is a stub when built without espeak tag.
func NewEspeakNGAdapter ¶
func NewEspeakNGAdapter(_ string) *EspeakNGAdapter
NewEspeakNGAdapter returns a stub adapter.
func (*EspeakNGAdapter) Available ¶
func (a *EspeakNGAdapter) Available() bool
func (*EspeakNGAdapter) Close ¶
func (a *EspeakNGAdapter) Close()
func (*EspeakNGAdapter) GetProvider ¶
func (a *EspeakNGAdapter) GetProvider() *EspeakNGProvider
func (*EspeakNGAdapter) ListVoices ¶
func (a *EspeakNGAdapter) ListVoices(_ context.Context) ([]Voice, error)
func (*EspeakNGAdapter) MaxTextLength ¶
func (a *EspeakNGAdapter) MaxTextLength() int
func (*EspeakNGAdapter) Name ¶
func (a *EspeakNGAdapter) Name() string
func (*EspeakNGAdapter) SupportedFormats ¶
func (a *EspeakNGAdapter) SupportedFormats() []AudioFormat
func (*EspeakNGAdapter) Synthesize ¶
func (a *EspeakNGAdapter) Synthesize(_ context.Context, _ *SynthesizeRequest) (*SynthesizeResponse, error)
func (*EspeakNGAdapter) SynthesizeStream ¶
func (a *EspeakNGAdapter) SynthesizeStream(_ context.Context, _ *SynthesizeRequest, _ StreamCallback) error
func (*EspeakNGAdapter) Type ¶
func (a *EspeakNGAdapter) Type() ProviderType
type EspeakNGProvider ¶
type EspeakNGProvider struct{}
EspeakNGProvider is a stub when built without espeak tag.
func NewEspeakNGProvider ¶
func NewEspeakNGProvider(_ string) *EspeakNGProvider
NewEspeakNGProvider returns a stub provider.
func (*EspeakNGProvider) Close ¶
func (p *EspeakNGProvider) Close()
func (*EspeakNGProvider) Initialize ¶
func (p *EspeakNGProvider) Initialize() error
func (*EspeakNGProvider) Name ¶
func (p *EspeakNGProvider) Name() string
func (*EspeakNGProvider) SupportedLanguages ¶
func (p *EspeakNGProvider) SupportedLanguages() []string
func (*EspeakNGProvider) Synthesize ¶
func (p *EspeakNGProvider) Synthesize(_ context.Context, _ *EspeakNGRequest) (io.ReadCloser, error)
func (*EspeakNGProvider) TextToPhonemes ¶
func (p *EspeakNGProvider) TextToPhonemes(_, _ string) (string, error)
func (*EspeakNGProvider) Type ¶
func (p *EspeakNGProvider) Type() string
type EspeakNGRequest ¶
type EspeakNGRequest struct {
Text string `json:"text"`
Language string `json:"language"`
Rate float32 `json:"rate"`
Pitch float32 `json:"pitch"`
Volume float32 `json:"volume"`
}
EspeakNGRequest represents a synthesis request (stub).
type Handler ¶
type Handler struct {
// contains filtered or unexported fields
}
Handler handles TTS management HTTP requests.
func (*Handler) ListProviders ¶
ListProviders returns available TTS providers.
func (*Handler) ListVoices ¶
ListVoices returns available TTS voices.
func (*Handler) RegisterRoutes ¶
RegisterRoutes registers the TTS management routes.
type KokoroModelManager ¶
type KokoroModelManager struct{}
func NewKokoroModelManager ¶
func NewKokoroModelManager(dataPath string) *KokoroModelManager
func (*KokoroModelManager) CancelDownload ¶
func (m *KokoroModelManager) CancelDownload() error
func (*KokoroModelManager) DownloadModel ¶
func (m *KokoroModelManager) DownloadModel(ctx context.Context) error
func (*KokoroModelManager) GetDownloadProgress ¶
func (m *KokoroModelManager) GetDownloadProgress() (float64, string, error)
func (*KokoroModelManager) GetModelPath ¶
func (m *KokoroModelManager) GetModelPath() string
func (*KokoroModelManager) GetModelStatus ¶
func (m *KokoroModelManager) GetModelStatus() map[string]interface{}
func (*KokoroModelManager) IsModelReady ¶
func (m *KokoroModelManager) IsModelReady() bool
type LanguageStats ¶
type LanguageStats struct {
CJKHan int // CJK Unified Ideographs (shared: zh, ja, ko)
Kana int // Hiragana + Katakana (unique to Japanese)
Hangul int // Korean Hangul (unique to Korean)
Latin int // Latin script (en, fr, de, es, pt, etc.)
Cyrillic int // Cyrillic script (ru, uk, bg, etc.)
Arabic int // Arabic script (ar, fa, ur, etc.)
Thai int // Thai script
Devanag int // Devanagari script (hi, mr, ne, etc.)
Other int // Other letters/numbers
Total int // Total scored characters
}
LanguageStats holds language detection statistics. CJKHan counts are shared across Chinese/Japanese/Korean since Han ideographs are used by all three languages. Disambiguating scripts (Kana, Hangul) break ties.
func AnalyzeText ¶
func AnalyzeText(text string) *LanguageStats
AnalyzeText analyzes text and returns language statistics.
func (*LanguageStats) GetLanguageRatios ¶
func (s *LanguageStats) GetLanguageRatios() map[string]float64
GetLanguageRatios returns all language ratios.
func (*LanguageStats) IsMixedLanguage ¶
func (s *LanguageStats) IsMixedLanguage() bool
IsMixedLanguage returns true if text contains multiple language scripts.
func (*LanguageStats) PrimaryLanguage ¶
func (s *LanguageStats) PrimaryLanguage() (lang string, ratio float64)
PrimaryLanguage returns the primary language and its confidence ratio. Han ideographs are distributed to CJK languages based on disambiguating scripts:
- If Kana present → Han counts toward Japanese
- If Hangul present → Han counts toward Korean
- If neither → Han counts toward Chinese
- If both Kana and Hangul → Han split proportionally
type LocalSpeaker ¶
type LocalSpeaker interface {
SpeakLocally(ctx context.Context, text string, speed float32) error
StopSpeaking()
}
LocalSpeaker is an optional interface for providers that can play audio locally.
type MacOSNativeTTS ¶
type MacOSNativeTTS struct{}
MacOSNativeTTS stub for non-macOS systems
func NewMacOSNativeTTS ¶
func NewMacOSNativeTTS() *MacOSNativeTTS
NewMacOSNativeTTS creates a stub provider
func (*MacOSNativeTTS) Available ¶
func (p *MacOSNativeTTS) Available() bool
Available returns false on non-macOS systems
func (*MacOSNativeTTS) Initialize ¶
func (p *MacOSNativeTTS) Initialize() error
Initialize returns error on non-macOS
func (*MacOSNativeTTS) ListVoices ¶
func (p *MacOSNativeTTS) ListVoices(ctx context.Context) ([]Voice, error)
ListVoices returns empty list
func (*MacOSNativeTTS) MaxTextLength ¶
func (p *MacOSNativeTTS) MaxTextLength() int
MaxTextLength returns 0
func (*MacOSNativeTTS) SupportedFormats ¶
func (p *MacOSNativeTTS) SupportedFormats() []AudioFormat
SupportedFormats returns empty list
func (*MacOSNativeTTS) Synthesize ¶
func (p *MacOSNativeTTS) Synthesize(ctx context.Context, req *SynthesizeRequest) (*SynthesizeResponse, error)
Synthesize returns error
func (*MacOSNativeTTS) SynthesizeStream ¶
func (p *MacOSNativeTTS) SynthesizeStream(ctx context.Context, req *SynthesizeRequest, callback StreamCallback) error
SynthesizeStream returns error
func (*MacOSNativeTTS) Type ¶
func (p *MacOSNativeTTS) Type() ProviderType
Type returns provider type
type Phonemizer ¶
type Phonemizer interface {
// TextToPhonemes converts text to IPA phonemes.
// lang is a language code (e.g. "en", "cmn", "ja").
TextToPhonemes(text, lang string) (string, error)
}
Phonemizer converts text to IPA phoneme strings.
type Provider ¶
type Provider interface {
// Name returns the provider name.
Name() string
// Type returns the provider type.
Type() ProviderType
// Synthesize synthesizes text to speech.
Synthesize(ctx context.Context, req *SynthesizeRequest) (*SynthesizeResponse, error)
// SynthesizeStream synthesizes text with streaming audio output.
SynthesizeStream(ctx context.Context, req *SynthesizeRequest, callback StreamCallback) error
// ListVoices returns available voices.
ListVoices(ctx context.Context) ([]Voice, error)
// SupportedFormats returns the supported audio formats.
SupportedFormats() []AudioFormat
// MaxTextLength returns the maximum text length.
MaxTextLength() int
}
Provider defines the TTS provider interface.
func NewWindowsNativeTTSProvider ¶
func NewWindowsNativeTTSProvider() Provider
NewWindowsNativeTTSProvider returns nil on non-Windows platforms.
type ProviderConfig ¶
type ProviderConfig struct {
// Type is the provider type.
Type ProviderType `json:"type" yaml:"type"`
// Enabled indicates if this provider is enabled.
Enabled bool `json:"enabled" yaml:"enabled"`
// APIKey is the API key for the provider.
APIKey string `json:"api_key,omitempty" yaml:"api_key,omitempty"`
// BaseURL is the base URL for the API.
BaseURL string `json:"base_url,omitempty" yaml:"base_url,omitempty"`
// DefaultVoice is the default voice ID.
DefaultVoice string `json:"default_voice,omitempty" yaml:"default_voice,omitempty"`
// DefaultFormat is the default audio format.
DefaultFormat AudioFormat `json:"default_format,omitempty" yaml:"default_format,omitempty"`
// MaxTextLength is the maximum text length.
MaxTextLength int `json:"max_text_length,omitempty" yaml:"max_text_length,omitempty"`
}
ProviderConfig holds the configuration for a TTS provider.
type ProviderFactory ¶
type ProviderFactory struct{}
ProviderFactory creates TTS providers based on type
func (*ProviderFactory) CreateProvider ¶
func (pf *ProviderFactory) CreateProvider(providerType string) (Provider, error)
CreateProvider creates a TTS provider based on the provider type
func (*ProviderFactory) GetAvailableProviders ¶
func (pf *ProviderFactory) GetAvailableProviders() []string
GetAvailableProviders returns list of available provider types
type ProviderType ¶
type ProviderType string
ProviderType represents the type of TTS provider.
const ( // ProviderOpenAI is OpenAI's TTS API. ProviderOpenAI ProviderType = "openai" // ProviderElevenLabs is ElevenLabs TTS. ProviderElevenLabs ProviderType = "elevenlabs" // ProviderPiper is local Piper TTS. ProviderPiper ProviderType = "piper" // ProviderKokoro is local Kokoro TTS (legacy, Python-based). ProviderKokoro ProviderType = "kokoro" // ProviderSherpa is local TTS using sherpa-onnx (native Go, no Python). ProviderSherpa ProviderType = "sherpa" // ProviderEspeakNG is local eSpeak-NG TTS. ProviderEspeakNG ProviderType = "espeak-ng" // ProviderEdge is Microsoft Edge TTS. ProviderEdge ProviderType = "edge-tts" // ProviderMacOSNative is macOS native TTS using AVSpeechSynthesizer. ProviderMacOSNative ProviderType = "macos-native" // ProviderWindowsNative is Windows native TTS using WinRT SpeechSynthesis. ProviderWindowsNative ProviderType = "windows-native" )
type Service ¶
type Service interface {
// Synthesize synthesizes text using the default provider.
Synthesize(ctx context.Context, req *SynthesizeRequest) (*SynthesizeResponse, error)
// SynthesizeWithProvider synthesizes text using a specific provider.
SynthesizeWithProvider(ctx context.Context, providerType ProviderType, req *SynthesizeRequest) (*SynthesizeResponse, error)
// SynthesizeStream synthesizes text with streaming audio output.
SynthesizeStream(ctx context.Context, req *SynthesizeRequest, callback StreamCallback) error
// ListVoices returns available voices from the default provider.
ListVoices(ctx context.Context) ([]Voice, error)
// ListProviders returns all available providers.
ListProviders() []ProviderType
// GetDefaultProvider returns the default provider type.
GetDefaultProvider() ProviderType
// SetDefaultProvider sets the default provider type.
SetDefaultProvider(providerType ProviderType) error
// GetProvider returns the provider instance for a given provider type.
GetProvider(providerType ProviderType) Provider
// GetConfig returns the current TTS configuration (speed, pitch, volume).
GetConfig() (speed, pitch, volume float32)
// SetConfig sets the TTS configuration (speed, pitch, volume).
SetConfig(speed, pitch, volume float32)
// GetVocoderStatus returns vocoder model status.
GetVocoderStatus() map[string]interface{}
// DownloadVocoderModel starts downloading the vocoder model.
DownloadVocoderModel(ctx context.Context) error
// CancelVocoderDownload cancels the vocoder download.
CancelVocoderDownload()
// GetKokoroStatus returns Kokoro model status.
GetKokoroStatus() map[string]interface{}
// DownloadKokoroModel starts downloading the Kokoro model.
DownloadKokoroModel(ctx context.Context) error
// CancelKokoroDownload cancels the Kokoro download.
CancelKokoroDownload()
// Close cleans up all provider resources.
Close()
}
Service defines the TTS service interface.
func NewService ¶
func NewService(cfg *ServiceConfig) (Service, error)
NewService creates a new TTS service.
type ServiceConfig ¶
type ServiceConfig struct {
DefaultProvider ProviderType
Providers []ProviderConfig
DataPath string // Path for model downloads
}
ServiceConfig holds the configuration for the TTS service.
type StreamCallback ¶
StreamCallback is called when streaming audio chunks are available.
type SynthesizeRequest ¶
type SynthesizeRequest struct {
// Text is the text to synthesize.
Text string
// Voice is the voice ID to use.
Voice string
// Format is the output audio format.
Format AudioFormat
// Speed is the speech speed (0.25-4.0, default 1.0).
Speed float32
// Pitch is the speech pitch adjustment.
Pitch float32
// Volume is the speech volume (0-200, default 100).
Volume float32
}
SynthesizeRequest represents a synthesis request.
type SynthesizeResponse ¶
type SynthesizeResponse struct {
// Audio is the synthesized audio data.
Audio io.ReadCloser
// Format is the audio format.
Format AudioFormat
// ContentType is the MIME content type.
ContentType string
// Duration is the estimated audio duration in seconds.
Duration float64
}
SynthesizeResponse represents a synthesis response.
type VocoderModelManager ¶
type VocoderModelManager struct{}
VocoderModelManager stub when espeak is not compiled in.
func NewVocoderModelManager ¶
func NewVocoderModelManager(dataPath string) *VocoderModelManager
func (*VocoderModelManager) CancelDownload ¶
func (m *VocoderModelManager) CancelDownload()
func (*VocoderModelManager) DownloadModel ¶
func (m *VocoderModelManager) DownloadModel(ctx context.Context) error
func (*VocoderModelManager) GetModelStatus ¶
func (m *VocoderModelManager) GetModelStatus() map[string]interface{}
func (*VocoderModelManager) IsReady ¶
func (m *VocoderModelManager) IsReady() bool
type Voice ¶
type Voice struct {
// ID is the voice identifier.
ID string `json:"id"`
// Name is the display name.
Name string `json:"name"`
// Language is the voice language.
Language string `json:"language"`
// Gender is the voice gender.
Gender string `json:"gender,omitempty"`
// Description is the voice description.
Description string `json:"description,omitempty"`
// PreviewURL is a URL to preview the voice.
PreviewURL string `json:"preview_url,omitempty"`
// Provider is the TTS provider name (e.g., "Kokoro", "eSpeak-NG (Robotic)").
Provider string `json:"provider,omitempty"`
// Quality indicates voice quality level (e.g., "high", "medium", "low").
Quality string `json:"quality,omitempty"`
}
Voice represents a TTS voice.
func GetVoicesByLanguage ¶
GetVoicesByLanguage returns voices filtered by language prefix.
Source Files
¶
- audio_preprocessor.go
- consent_manager.go
- edge_tts.go
- espeak_adapter_stub.go
- espeak_model_manager_stub.go
- espeak_ng_stub.go
- g2p_en_espeak_stub.go
- handler.go
- kokoro_model_manager_stub.go
- kokoro_stub.go
- language_detect.go
- macos_native_stub.go
- provider_factory.go
- service.go
- types.go
- vocoder_model_manager_stub.go
- windows_native_check_stub.go