Documentation
¶
Index ¶
- Variables
- func ConvertAudioFormat(input []byte, outputFormat string) ([]byte, error)
- func DetectAudioFormat(data []byte) string
- func DownloadAudio(ctx context.Context, url string) ([]byte, error)
- func GetAudioDuration(audio []byte, format string) float64
- func GetAvailableVoices(provider, language string) []string
- func GetSupportedLanguages(provider string) []string
- func PlayAudio(audio []byte) error
- func ValidateAudio(audio []byte, maxSize int) error
- type AzureProvider
- func (p *AzureProvider) ASR(ctx context.Context, audio []byte) (string, error)
- func (p *AzureProvider) IsAvailable() bool
- func (p *AzureProvider) Name() string
- func (p *AzureProvider) StreamASR(ctx context.Context, audio <-chan []byte) (<-chan string, error)
- func (p *AzureProvider) StreamTTS(ctx context.Context, text string) (<-chan []byte, error)
- func (p *AzureProvider) TTS(ctx context.Context, text string) ([]byte, error)
- type ConversationSession
- type ElevenLabsProvider
- func (p *ElevenLabsProvider) ASR(ctx context.Context, audio []byte) (string, error)
- func (p *ElevenLabsProvider) IsAvailable() bool
- func (p *ElevenLabsProvider) Name() string
- func (p *ElevenLabsProvider) StreamASR(ctx context.Context, audio <-chan []byte) (<-chan string, error)
- func (p *ElevenLabsProvider) StreamTTS(ctx context.Context, text string) (<-chan []byte, error)
- func (p *ElevenLabsProvider) TTS(ctx context.Context, text string) ([]byte, error)
- type GoogleTTSProvider
- func (p *GoogleTTSProvider) ASR(ctx context.Context, audio []byte) (string, error)
- func (p *GoogleTTSProvider) IsAvailable() bool
- func (p *GoogleTTSProvider) Name() string
- func (p *GoogleTTSProvider) StreamASR(ctx context.Context, audio <-chan []byte) (<-chan string, error)
- func (p *GoogleTTSProvider) StreamTTS(ctx context.Context, text string) (<-chan []byte, error)
- func (p *GoogleTTSProvider) TTS(ctx context.Context, text string) ([]byte, error)
- type Manager
- func (m *Manager) ASR(ctx context.Context, audio []byte) (*VoiceResult, error)
- func (m *Manager) ASRWithProvider(ctx context.Context, providerName string, audio []byte) (*VoiceResult, error)
- func (m *Manager) Base64Decode(data string) ([]byte, error)
- func (m *Manager) Base64Encode(audio []byte) string
- func (m *Manager) ConvertAudioFormat(audio []byte, format string) ([]byte, error)
- func (m *Manager) DownloadAudio(ctx context.Context, url string) ([]byte, error)
- func (m *Manager) GenerateVoiceResponse(ctx context.Context, session *ConversationSession, text string) ([]byte, error)
- func (m *Manager) GetActiveProvider() VoiceProvider
- func (m *Manager) GetAvailableProviders() []string
- func (m *Manager) GetAvailableVoices(provider, language string) []string
- func (m *Manager) GetConfig() *VoiceConfig
- func (m *Manager) GetPresets() []VoicePreset
- func (m *Manager) GetProvider(name string) (VoiceProvider, error)
- func (m *Manager) GetProviders() []string
- func (m *Manager) GetSupportedLanguages(provider string) []string
- func (m *Manager) LoadAudio(path string) ([]byte, error)
- func (m *Manager) PlayAudio(audio []byte) error
- func (m *Manager) ProcessVoiceInput(ctx context.Context, session *ConversationSession, audio []byte) (string, error)
- func (m *Manager) SaveAudio(audio []byte, path string) error
- func (m *Manager) SetProvider(name string) error
- func (m *Manager) StartSession(ctx context.Context, language string) (*ConversationSession, error)
- func (m *Manager) StreamASR(ctx context.Context, audio <-chan []byte) (<-chan string, error)
- func (m *Manager) StreamTTS(ctx context.Context, text string) (<-chan *VoiceResult, error)
- func (m *Manager) TTS(ctx context.Context, text string) (*VoiceResult, error)
- func (m *Manager) TTSWithProvider(ctx context.Context, providerName, text string) (*VoiceResult, error)
- func (m *Manager) UpdateConfig(config *VoiceConfig) error
- type OpenAIProvider
- func (p *OpenAIProvider) ASR(ctx context.Context, audio []byte) (string, error)
- func (p *OpenAIProvider) IsAvailable() bool
- func (p *OpenAIProvider) Name() string
- func (p *OpenAIProvider) StreamASR(ctx context.Context, audio <-chan []byte) (<-chan string, error)
- func (p *OpenAIProvider) StreamTTS(ctx context.Context, text string) (<-chan []byte, error)
- func (p *OpenAIProvider) TTS(ctx context.Context, text string) ([]byte, error)
- type ProviderConfig
- type ProviderCredentials
- type VoiceConfig
- type VoiceMessage
- type VoicePreset
- type VoiceProvider
- type VoiceResult
- type WhisperProvider
- func (p *WhisperProvider) ASR(ctx context.Context, audio []byte) (string, error)
- func (p *WhisperProvider) IsAvailable() bool
- func (p *WhisperProvider) Name() string
- func (p *WhisperProvider) StreamASR(ctx context.Context, audio <-chan []byte) (<-chan string, error)
- func (p *WhisperProvider) StreamTTS(ctx context.Context, text string) (<-chan []byte, error)
- func (p *WhisperProvider) TTS(ctx context.Context, text string) ([]byte, error)
Constants ¶
This section is empty.
Variables ¶
var HTTPClient = &http.Client{ Timeout: 60 * time.Second, }
HTTPClient is the HTTP client used for API requests
Functions ¶
func ConvertAudioFormat ¶ added in v0.3.0
ConvertAudioFormat converts audio to different format using ffmpeg
func DetectAudioFormat ¶ added in v0.3.0
DetectAudioFormat detects audio format from data
func DownloadAudio ¶ added in v0.3.0
DownloadAudio downloads audio from URL
func GetAudioDuration ¶ added in v0.3.0
GetAudioDuration estimates audio duration in seconds
func GetAvailableVoices ¶ added in v0.3.0
GetAvailableVoices returns available voices for a provider and language
func GetSupportedLanguages ¶ added in v0.3.0
GetSupportedLanguages returns supported languages for a provider
func ValidateAudio ¶ added in v0.3.0
ValidateAudio validates audio data
Types ¶
type AzureProvider ¶
type AzureProvider struct {
// contains filtered or unexported fields
}
AzureProvider implements Azure Speech Services Docs: https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/
func NewAzureProvider ¶ added in v0.3.0
func NewAzureProvider(config *ProviderConfig) *AzureProvider
NewAzureProvider creates a new Azure Speech provider
func (*AzureProvider) IsAvailable ¶
func (p *AzureProvider) IsAvailable() bool
func (*AzureProvider) Name ¶
func (p *AzureProvider) Name() string
type ConversationSession ¶
type ConversationSession struct {
ID string
StartTime time.Time
Messages []VoiceMessage
Language string
IsActive bool
}
ConversationSession represents a voice conversation session
type ElevenLabsProvider ¶
type ElevenLabsProvider struct {
// contains filtered or unexported fields
}
ElevenLabsProvider implements ElevenLabs TTS Docs: https://elevenlabs.io/docs/api-reference/text-to-speech
func NewElevenLabsProvider ¶ added in v0.3.0
func NewElevenLabsProvider(config *ProviderConfig) *ElevenLabsProvider
NewElevenLabsProvider creates a new ElevenLabs TTS provider
func (*ElevenLabsProvider) IsAvailable ¶
func (p *ElevenLabsProvider) IsAvailable() bool
func (*ElevenLabsProvider) Name ¶
func (p *ElevenLabsProvider) Name() string
type GoogleTTSProvider ¶ added in v0.3.0
type GoogleTTSProvider struct {
// contains filtered or unexported fields
}
GoogleTTSProvider implements Google Cloud Text-to-Speech Docs: https://cloud.google.com/text-to-speech/docs
func NewGoogleTTSProvider ¶ added in v0.3.0
func NewGoogleTTSProvider(config *ProviderConfig) *GoogleTTSProvider
NewGoogleTTSProvider creates a new Google TTS provider
func (*GoogleTTSProvider) IsAvailable ¶ added in v0.3.0
func (p *GoogleTTSProvider) IsAvailable() bool
func (*GoogleTTSProvider) Name ¶ added in v0.3.0
func (p *GoogleTTSProvider) Name() string
type Manager ¶
type Manager struct {
// contains filtered or unexported fields
}
Manager manages voice operations
func NewManager ¶
func NewManager(config *VoiceConfig) *Manager
NewManager creates a new voice manager
func (*Manager) ASRWithProvider ¶ added in v0.3.0
func (m *Manager) ASRWithProvider(ctx context.Context, providerName string, audio []byte) (*VoiceResult, error)
ASRWithProvider converts speech to text using a specific provider
func (*Manager) Base64Decode ¶
Base64Decode decodes base64 to audio
func (*Manager) Base64Encode ¶
Base64Encode encodes audio to base64
func (*Manager) ConvertAudioFormat ¶ added in v0.3.0
ConvertAudioFormat converts audio to different format
func (*Manager) DownloadAudio ¶ added in v0.3.0
DownloadAudio downloads audio from URL
func (*Manager) GenerateVoiceResponse ¶
func (m *Manager) GenerateVoiceResponse(ctx context.Context, session *ConversationSession, text string) ([]byte, error)
GenerateVoiceResponse generates voice response for text
func (*Manager) GetActiveProvider ¶ added in v0.3.0
func (m *Manager) GetActiveProvider() VoiceProvider
GetActiveProvider returns the currently active provider
func (*Manager) GetAvailableProviders ¶ added in v0.3.0
GetAvailableProviders returns list of providers that are currently available (have API keys)
func (*Manager) GetAvailableVoices ¶ added in v0.3.0
GetAvailableVoices returns available voices for a provider and language
func (*Manager) GetConfig ¶ added in v0.3.0
func (m *Manager) GetConfig() *VoiceConfig
GetConfig returns the current voice configuration
func (*Manager) GetPresets ¶
func (m *Manager) GetPresets() []VoicePreset
GetPresets returns built-in voice presets
func (*Manager) GetProvider ¶ added in v0.3.0
func (m *Manager) GetProvider(name string) (VoiceProvider, error)
GetProvider returns a specific provider by name
func (*Manager) GetProviders ¶
GetProviders returns list of available providers
func (*Manager) GetSupportedLanguages ¶ added in v0.3.0
GetSupportedLanguages returns supported languages for a provider
func (*Manager) ProcessVoiceInput ¶
func (m *Manager) ProcessVoiceInput(ctx context.Context, session *ConversationSession, audio []byte) (string, error)
ProcessVoiceInput processes voice input and returns text
func (*Manager) SetProvider ¶
SetProvider sets the active voice provider
func (*Manager) StartSession ¶
StartSession starts a new voice conversation
func (*Manager) TTSWithProvider ¶ added in v0.3.0
func (m *Manager) TTSWithProvider(ctx context.Context, providerName, text string) (*VoiceResult, error)
TTSWithProvider converts text to speech using a specific provider
func (*Manager) UpdateConfig ¶ added in v0.3.0
func (m *Manager) UpdateConfig(config *VoiceConfig) error
UpdateConfig updates the voice configuration
type OpenAIProvider ¶
type OpenAIProvider struct {
// contains filtered or unexported fields
}
OpenAIProvider implements OpenAI TTS Docs: https://platform.openai.com/docs/guides/text-to-speech
func NewOpenAIProvider ¶ added in v0.3.0
func NewOpenAIProvider(config *ProviderConfig) *OpenAIProvider
NewOpenAIProvider creates a new OpenAI TTS provider
func (*OpenAIProvider) IsAvailable ¶
func (p *OpenAIProvider) IsAvailable() bool
func (*OpenAIProvider) Name ¶
func (p *OpenAIProvider) Name() string
type ProviderConfig ¶ added in v0.3.0
type ProviderConfig struct {
APIKey string
APIURL string
Region string
Model string
Voice string
Language string
Speed float64
Pitch float64
}
ProviderConfig holds configuration for voice providers
type ProviderCredentials ¶ added in v0.3.0
type ProviderCredentials struct {
APIKey string `json:"api_key"`
APIURL string `json:"api_url"`
Region string `json:"region"`
}
ProviderCredentials holds credentials for a specific provider
type VoiceConfig ¶
type VoiceConfig struct {
Provider string `json:"provider"` // openai, elevenlabs, azure, google, whisper
Model string `json:"model"` // TTS/ASR model
Voice string `json:"voice"` // Voice ID
Language string `json:"language"` // Language code (en, zh, etc.)
Speed float64 `json:"speed"` // Speech speed (0.5-2.0)
Pitch float64 `json:"pitch"` // Voice pitch (-20 to 20)
APIKey string `json:"api_key"` // API key
APIURL string `json:"api_url"` // Custom API URL
Region string `json:"region"` // Azure region
Latency string `json:"latency"` // ultra_low, low, medium
Instructions string `json:"instructions"` // Voice instructions
Providers map[string]ProviderCredentials `json:"providers"` // Provider-specific credentials
}
VoiceConfig holds voice configuration
func DefaultVoiceConfig ¶
func DefaultVoiceConfig() *VoiceConfig
DefaultVoiceConfig returns a default voice configuration
type VoiceMessage ¶
type VoiceMessage struct {
Role string // user, assistant
Text string // transcribed text
Audio []byte // audio data
Timestamp time.Time
}
VoiceMessage represents a voice message in a conversation
type VoicePreset ¶
type VoicePreset struct {
Name string `json:"name"`
Provider string `json:"provider"`
Model string `json:"model"`
VoiceID string `json:"voice_id"`
Speed float64 `json:"speed"`
Pitch float64 `json:"pitch"`
Instructions string `json:"instructions"`
Language string `json:"language"`
}
VoicePreset represents a voice preset
type VoiceProvider ¶
type VoiceProvider interface {
// Name returns the provider name
Name() string
// IsAvailable checks if the provider is available
IsAvailable() bool
// TTS converts text to speech
TTS(ctx context.Context, text string) ([]byte, error)
// ASR converts speech to text
ASR(ctx context.Context, audio []byte) (string, error)
// StreamTTS streams text to speech
StreamTTS(ctx context.Context, text string) (<-chan []byte, error)
// StreamASR streams speech to text
StreamASR(ctx context.Context, audio <-chan []byte) (<-chan string, error)
}
VoiceProvider defines the interface for TTS/ASR providers
type VoiceResult ¶
type VoiceResult struct {
Audio []byte // Audio data
Text string // Text content
Duration float64 // Duration in seconds
Format string // Audio format (mp3, wav, ogg)
SampleRate int // Sample rate
Provider string // Provider used
}
VoiceResult represents a voice operation result
type WhisperProvider ¶
type WhisperProvider struct {
// contains filtered or unexported fields
}
WhisperProvider implements OpenAI Whisper for ASR only This is a specialized provider for local Whisper installations
func NewWhisperProvider ¶ added in v0.3.0
func NewWhisperProvider(config *ProviderConfig) *WhisperProvider
NewWhisperProvider creates a new Whisper ASR provider
func (*WhisperProvider) IsAvailable ¶
func (p *WhisperProvider) IsAvailable() bool
func (*WhisperProvider) Name ¶
func (p *WhisperProvider) Name() string