synthesizer

package

v1.2.0 Latest Latest Go to latest Published: Jun 6, 2026 License: MIT Imports: 38 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/LingByte/lingllm

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
func CheckLocalTTSAvailable() []string
func ComputeSampleByteCount(sampleRate, bitDepth, channels int) int
func DetectLocalTTSCommand() string
func GetAzureVoices() map[string]string
func GetLocalTTSInfo() map[string]interface{}
func NormalizeFramePeriod(d string) time.Duration
func SetGlobalSynthesisFactory(factory SynthesisFactory)
func StripEmoji(text string) string
func WithSynthesis(svc AudioSynthesisEngine) media.MediaHandlerFunc
type AmazonService
- func NewAmazonService(opt AmazonTTSConfig) *AmazonService
- func (as *AmazonService) CacheKey(text string) string
- func (as *AmazonService) Close() error
- func (as *AmazonService) Format() media.StreamFormat
- func (as *AmazonService) Provider() TTSProvider
- func (as *AmazonService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
type AmazonTTSConfig
- func NewAmazonTTSOption(region string, outputFormat types.OutputFormat, voiceId types.VoiceId) AmazonTTSConfig
- func (c *AmazonTTSConfig) GetProvider() TTSProvider
- func (opt *AmazonTTSConfig) String() string
type AudioAudioSynthesisPlayerRequest
type AudioSynthesisEngine
- func NewAudioSynthesisEngine(name string, options map[string]any) (AudioSynthesisEngine, error)
- func NewAudioSynthesisEngineFromCredential(config TTSCredentialConfig) (AudioSynthesisEngine, error)
type AudioSynthesisHandler
type AudioSynthesisPlayer
- func NewAudioSynthesisPlayer(vendor string, format media.StreamFormat) *AudioSynthesisPlayer
- func (player *AudioSynthesisPlayer) Close()
- func (player *AudioSynthesisPlayer) Emit(h media.MediaHandler, audioPacket *media.AudioPacket, inputRate int)
- func (player *AudioSynthesisPlayer) EmitStopPlayState(h media.MediaHandler, duration string, playId string, sequence int, ...)
- func (player *AudioSynthesisPlayer) Interrupt(h media.MediaHandler, reason string)
- func (player *AudioSynthesisPlayer) Run(handler media.MediaHandler, ctx context.Context)
type AudioSynthesisRequest
- func (req *AudioSynthesisRequest) OnMessage(data []byte)
- func (req *AudioSynthesisRequest) OnTimestamp(timestamp SentenceTimestamp)
type AzureConfig
- func NewAzureConfig(subscriptionKey, region string) AzureConfig
- func (c *AzureConfig) GetProvider() TTSProvider
type AzureRequest
type AzureService
- func NewAzureService(opt AzureConfig) *AzureService
- func (as *AzureService) CacheKey(text string) string
- func (as *AzureService) Close() error
- func (as *AzureService) Format() media.StreamFormat
- func (as *AzureService) Provider() TTSProvider
- func (as *AzureService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
type BaiduTTSConfig
- func NewBaiduTTSOption(token string) BaiduTTSConfig
- func (c *BaiduTTSConfig) GetProvider() TTSProvider
- func (opt *BaiduTTSConfig) String() string
type BaiduTTSService
- func NewBaiduService(opt BaiduTTSConfig) *BaiduTTSService
- func (bs *BaiduTTSService) CacheKey(text string) string
- func (bs *BaiduTTSService) Close() error
- func (bs *BaiduTTSService) DoubleURLEncode(text string) string
- func (bs *BaiduTTSService) Format() media.StreamFormat
- func (bs *BaiduTTSService) Provider() TTSProvider
- func (bs *BaiduTTSService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
type CoquiResponse
type CoquiService
- func NewCoquiService(opt CoquiTTSOption) *CoquiService
- func (c *CoquiService) CacheKey(text string) string
- func (c *CoquiService) Close() error
- func (c *CoquiService) Format() media.StreamFormat
- func (c *CoquiService) Provider() TTSProvider
- func (c *CoquiService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
type CoquiTTSOption
- func NewCoquiTTSOption(url string) CoquiTTSOption
- func (c *CoquiTTSOption) GetProvider() TTSProvider
- func (opt *CoquiTTSOption) String() string
type DefaultSynthesisFactory
- func NewSynthesisFactory() *DefaultSynthesisFactory
- func (f *DefaultSynthesisFactory) CreateEngine(config SynthesisConfig) (AudioSynthesisEngine, error)
- func (f *DefaultSynthesisFactory) GetSupportedProviders() []TTSProvider
- func (f *DefaultSynthesisFactory) IsProviderSupported(provider TTSProvider) bool
- func (f *DefaultSynthesisFactory) RegisterCreator(provider TTSProvider, ...)
type ElevenLabsConfig
- func NewElevenLabsConfig(apiKey, voiceID string) ElevenLabsConfig
- func (c *ElevenLabsConfig) GetProvider() TTSProvider
type ElevenLabsRequest
type ElevenLabsService
- func NewElevenLabsService(opt ElevenLabsConfig) *ElevenLabsService
- func (es *ElevenLabsService) CacheKey(text string) string
- func (es *ElevenLabsService) Close() error
- func (es *ElevenLabsService) Format() media.StreamFormat
- func (es *ElevenLabsService) Provider() TTSProvider
- func (es *ElevenLabsService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
type ElevenLabsVoiceSettings
type FishAudioConfig
- func NewFishAudioConfig(apiKey, referenceID string) FishAudioConfig
- func (c *FishAudioConfig) GetProvider() TTSProvider
type FishAudioListModelsResponse
type FishAudioRequest
type FishAudioService
- func NewFishAudioService(opt FishAudioConfig) *FishAudioService
- func (fa *FishAudioService) CacheKey(text string) string
- func (fa *FishAudioService) Close() error
- func (fa *FishAudioService) Format() media.StreamFormat
- func (fa *FishAudioService) Provider() TTSProvider
- func (fa *FishAudioService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
type FishAudioVoiceOption
- func GetFishAudioVoices(apiKey string) ([]FishAudioVoiceOption, error)
type FishSpeechConfig
- func NewFishSpeechConfig(apiKey, referenceID string) FishSpeechConfig
- func (c *FishSpeechConfig) GetProvider() TTSProvider
type FishSpeechListModelsRequest
type FishSpeechListModelsResponse
type FishSpeechService
- func NewFishSpeechService(opt FishSpeechConfig) *FishSpeechService
- func (fs *FishSpeechService) CacheKey(text string) string
- func (fs *FishSpeechService) Close() error
- func (fs *FishSpeechService) Format() media.StreamFormat
- func (fs *FishSpeechService) Provider() TTSProvider
- func (fs *FishSpeechService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
type FishSpeechV2Request
type FishSpeechV2Response
type FishSpeechVoiceOption
- func GetFishSpeechVoices(apiKey string) ([]FishSpeechVoiceOption, error)
type GoogleService
- func NewGoogleService(opt GoogleTTSOption) *GoogleService
- func (gs *GoogleService) CacheKey(text string) string
- func (gs *GoogleService) Close() error
- func (gs *GoogleService) Format() media.StreamFormat
- func (gs *GoogleService) Provider() TTSProvider
- func (gs *GoogleService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
type GoogleTTSOption
- func NewGoogleTTSOption(languageCode string) GoogleTTSOption
- func (c *GoogleTTSOption) GetProvider() TTSProvider
- func (opt *GoogleTTSOption) String() string
type LocalGoSpeechConfig
- func NewLocalGoSpeechConfig(provider LocalGoSpeechProvider, modelPath string) *LocalGoSpeechConfig
- func (c *LocalGoSpeechConfig) GetProvider() TTSProvider
type LocalGoSpeechProvider
type LocalGoSpeechService
- func NewLocalGoSpeechService(config *LocalGoSpeechConfig) (*LocalGoSpeechService, error)
- func (s *LocalGoSpeechService) CacheKey(text string) string
- func (s *LocalGoSpeechService) Close() error
- func (s *LocalGoSpeechService) Format() media.StreamFormat
- func (s *LocalGoSpeechService) GetConfig() *LocalGoSpeechConfig
- func (s *LocalGoSpeechService) GetSupportedLanguages() []string
- func (s *LocalGoSpeechService) GetSupportedSpeakers() []string
- func (s *LocalGoSpeechService) IsReady() bool
- func (s *LocalGoSpeechService) Provider() TTSProvider
- func (s *LocalGoSpeechService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
- func (s *LocalGoSpeechService) UpdateConfig(config *LocalGoSpeechConfig) error
type LocalService
- func NewLocalService(opt LocalTTSConfig) *LocalService
- func (ls *LocalService) CacheKey(text string) string
- func (ls *LocalService) Close() error
- func (ls *LocalService) Format() media.StreamFormat
- func (ls *LocalService) Provider() TTSProvider
- func (ls *LocalService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
type LocalTTSConfig
- func NewLocalTTSConfig(command string) LocalTTSConfig
- func (c *LocalTTSConfig) GetProvider() TTSProvider
type MinimaxAudioSetting
type MinimaxConnectionResponse
type MinimaxOption
- func NewMinimaxOption(apiKey string) MinimaxOption
- func (c *MinimaxOption) GetProvider() TTSProvider
- func (opt *MinimaxOption) String() string
type MinimaxPronunciationDict
type MinimaxService
- func NewMinimaxService(opt MinimaxOption) *MinimaxService
- func (ms *MinimaxService) CacheKey(text string) string
- func (ms *MinimaxService) Close() error
- func (ms *MinimaxService) Format() media.StreamFormat
- func (ms *MinimaxService) GetConnSessionID() string
- func (ms *MinimaxService) GetTraceID() string
- func (ms *MinimaxService) Provider() TTSProvider
- func (ms *MinimaxService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
type MinimaxTaskContinueResponse
type MinimaxTaskStartRequest
type MinimaxTaskStartResponse
type MinimaxTimbreWeight
type MinimaxVoiceSetting
type OpenAIConfig
- func NewOpenAIConfig(apiKey string) OpenAIConfig
- func (c *OpenAIConfig) GetProvider() TTSProvider
type OpenAIRequest
type OpenAIService
- func NewOpenAIService(opt OpenAIConfig) *OpenAIService
- func (os *OpenAIService) CacheKey(text string) string
- func (os *OpenAIService) Close() error
- func (os *OpenAIService) Format() media.StreamFormat
- func (os *OpenAIService) Provider() TTSProvider
- func (os *OpenAIService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
type PlayRecord
type QCloudService
- func NewQCloudService(opt QCloudTTSConfig) *QCloudService
- func (qs *QCloudService) CacheKey(text string) string
- func (qs *QCloudService) Close() error
- func (qs *QCloudService) Format() media.StreamFormat
- func (qs *QCloudService) Provider() TTSProvider
- func (qs *QCloudService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
type QCloudTTSConfig
- func NewQcloudTTSConfig(appId string, secretId string, secretKey string, voiceType int64, codec string, ...) QCloudTTSConfig
- func (c *QCloudTTSConfig) GetProvider() TTSProvider
- func (opt *QCloudTTSConfig) ToString() string
type QiniuService
- func NewQiniuService(opt QiniuTTSConfig) *QiniuService
- func (qs *QiniuService) CacheKey(text string) string
- func (qs *QiniuService) Close() error
- func (qs *QiniuService) Format() media.StreamFormat
- func (qs *QiniuService) Provider() TTSProvider
- func (qs *QiniuService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
type QiniuTTSConfig
- func NewQiniuTTSConfig(apiKey, baseURL string) QiniuTTSConfig
- func (c *QiniuTTSConfig) GetProvider() TTSProvider
type QiniuTTSRequest
type QiniuTTSResponse
type SentenceTimestamp
type SynthesisBuffer
- func (s *SynthesisBuffer) OnMessage(data []byte)
- func (s *SynthesisBuffer) OnTimestamp(timestamp SentenceTimestamp)
type SynthesisConfig
type SynthesisFactory
- func GetGlobalSynthesisFactory() SynthesisFactory
type TTSAddition
type TTSAudio
type TTSCredentialConfig
type TTSProvider
- func (tp TTSProvider) ToString() string
type TTSRequestData
type VolcAddition
type VolcengineService
- func NewVolcengineService(opt VolcengineTTSOption) *VolcengineService
- func (v *VolcengineService) CacheKey(text string) string
- func (v *VolcengineService) Close() error
- func (v *VolcengineService) Format() media.StreamFormat
- func (v *VolcengineService) Provider() TTSProvider
- func (v *VolcengineService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
type VolcengineTTSOption
- func NewVolcengineTTSOption(appID, accessToken, cluster string) VolcengineTTSOption
- func (c *VolcengineTTSOption) GetProvider() TTSProvider
type VolcengineTTSServResponse
type WSAudio
type WSHeader
type WSParameter
type WSPayload
type WSRequest
type WSTTS
type Word
type XunfeiService
- func NewXunfeiService(opt XunfeiTTSConfig) *XunfeiService
- func (xs *XunfeiService) CacheKey(text string) string
- func (xs *XunfeiService) Close() error
- func (xs *XunfeiService) Format() media.StreamFormat
- func (xs *XunfeiService) Provider() TTSProvider
- func (xs *XunfeiService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
type XunfeiTTSConfig
- func NewXunfeiTTSConfig(appID, apiKey, apiSecret string) XunfeiTTSConfig
- func (c *XunfeiTTSConfig) GetProvider() TTSProvider

Constants ¶

View Source

const (
	MinimaxWebSocketURL         = "wss://api.minimaxi.com/ws/v1/t2a_v2"
	MinimaxSpeech25TurboPreview = "speech-2.5-turbo-preview"
)

View Source

const (
	TTS_QCLOUD            = "tts.qcloud"
	TTS_XUNFEI            = "tts.xunfei"
	TTS_QINIU             = "tts.qiniu"
	TTS_BAIDU             = "tts.baidu"
	TTS_GOOGLE            = "tts.google"
	TTS_AWS               = "tts.aws"
	TTS_AZURE             = "tts.azure"
	TTS_OPENAI            = "tts.openai"
	TTS_ELEVENLABS        = "tts.elevenlabs"
	TTS_LOCAL             = "tts.local"
	TTS_LOCAL_GOSPEECH    = "tts.local_gospeech"
	TTS_FISHSPEECH        = "tts.fishspeech"
	TTS_FISHAUDIO         = "tts.fishaudio"
	TTS_COQUI             = "tts.coqui"
	TTS_VOLCENGINE        = "tts.volcengine"
	TTS_VOLCENGINE_CLONE  = "tts.volcengine_clone"
	TTS_VOLCENGINE_LLM    = "tts.volcengine_llm"
	TTS_VOLCENGINE_STREAM = "tts.volcengine_stream"
	TTS_MINIMAX           = "tts.minimax"
)

View Source

const (
	SsmlSpeak = "<speak>"

	VolcengineCloneCluster = "volcano_icl"
	VolcengineLLMCluster   = "volcano_tts"
)

Variables ¶

This section is empty.

Functions ¶

func CheckLocalTTSAvailable ¶

func CheckLocalTTSAvailable() []string

CheckLocalTTSAvailable 检查本地是否安装了 TTS 工具

func ComputeSampleByteCount ¶

func ComputeSampleByteCount(sampleRate, bitDepth, channels int) int

ComputeSampleByteCount computes the number of bytes for audio samples based on sample rate, bit depth, and number of channels. Formula: (sampleRate * bitDepth * channels) / 8

func DetectLocalTTSCommand ¶

func DetectLocalTTSCommand() string

DetectLocalTTSCommand 自动检测可用的本地 TTS 命令

func GetAzureVoices ¶

func GetAzureVoices() map[string]string

GetAzureVoices 获取可用的 Azure 音色列表（示例）

func GetLocalTTSInfo ¶

func GetLocalTTSInfo() map[string]interface{}

GetLocalTTSInfo 获取本地 TTS 信息

func NormalizeFramePeriod ¶

func NormalizeFramePeriod(d string) time.Duration

ValidateAndNormalizeDuration uses different validation logic with explicit bounds checking

func SetGlobalSynthesisFactory ¶

func SetGlobalSynthesisFactory(factory SynthesisFactory)

SetGlobalSynthesisFactory 设置全局TTS工厂实例

func StripEmoji ¶

func StripEmoji(text string) string

func WithSynthesis ¶

func WithSynthesis(svc AudioSynthesisEngine) media.MediaHandlerFunc

Types ¶

type AmazonService ¶

type AmazonService struct {
	// contains filtered or unexported fields
}

func NewAmazonService ¶

func NewAmazonService(opt AmazonTTSConfig) *AmazonService

func (*AmazonService) CacheKey ¶

func (as *AmazonService) CacheKey(text string) string

func (*AmazonService) Close ¶

func (as *AmazonService) Close() error

func (*AmazonService) Format ¶

func (as *AmazonService) Format() media.StreamFormat

func (*AmazonService) Provider ¶

func (as *AmazonService) Provider() TTSProvider

func (*AmazonService) Synthesize ¶

func (as *AmazonService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error

type AmazonTTSConfig ¶

type AmazonTTSConfig struct {
	SampleRate    int                `json:"sampleRate" env:"sample_rate" default:"16000"`
	Region        string             `json:"region"`
	OutputFormat  types.OutputFormat `json:"outputFormat" env:"output_format" default:"pcm"`
	VoiceId       types.VoiceId      `json:"voiceId" env:"voice_id"`
	Channels      int                `json:"channels" env:"channels" default:"1"`
	BitDepth      int                `json:"bitDepth" env:"bit_depth" default:"16"`
	FrameDuration string             `json:"frameDuration" env:"frame_duration" default:"20ms"`
}

func NewAmazonTTSOption ¶

func NewAmazonTTSOption(region string, outputFormat types.OutputFormat, voiceId types.VoiceId) AmazonTTSConfig

func (*AmazonTTSConfig) GetProvider ¶

func (c *AmazonTTSConfig) GetProvider() TTSProvider

GetProvider returns the TTS provider type

func (*AmazonTTSConfig) String ¶

func (opt *AmazonTTSConfig) String() string

type AudioAudioSynthesisPlayerRequest ¶

type AudioAudioSynthesisPlayerRequest struct {
	// contains filtered or unexported fields
}

type AudioSynthesisEngine ¶

type AudioSynthesisEngine interface {
	Provider() TTSProvider
	Format() media.StreamFormat
	CacheKey(text string) string
	Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
	Close() error
}

AudioSynthesisEngine is the core interface for TTS (Text-to-Speech) synthesis.

func NewAudioSynthesisEngine ¶

func NewAudioSynthesisEngine(name string, options map[string]any) (AudioSynthesisEngine, error)

func NewAudioSynthesisEngineFromCredential ¶

func NewAudioSynthesisEngineFromCredential(config TTSCredentialConfig) (AudioSynthesisEngine, error)

NewAudioSynthesisEngineFromCredential 根据凭证配置创建TTS服务

type AudioSynthesisHandler ¶

type AudioSynthesisHandler interface {
	OnMessage([]byte)
	OnTimestamp(timestamp SentenceTimestamp)
}

AudioSynthesisHandler is the callback interface for TTS synthesis events.

type AudioSynthesisPlayer ¶

type AudioSynthesisPlayer struct {
	SenderName string
	Format     media.StreamFormat
	// contains filtered or unexported fields
}

func NewAudioSynthesisPlayer ¶

func NewAudioSynthesisPlayer(vendor string, format media.StreamFormat) *AudioSynthesisPlayer

func (*AudioSynthesisPlayer) Close ¶

func (player *AudioSynthesisPlayer) Close()

func (*AudioSynthesisPlayer) Emit ¶

func (player *AudioSynthesisPlayer) Emit(h media.MediaHandler, audioPacket *media.AudioPacket, inputRate int)

func (*AudioSynthesisPlayer) EmitStopPlayState ¶

func (player *AudioSynthesisPlayer) EmitStopPlayState(h media.MediaHandler, duration string, playId string, sequence int, reason string, sourceText string)

func (*AudioSynthesisPlayer) Interrupt ¶

func (player *AudioSynthesisPlayer) Interrupt(h media.MediaHandler, reason string)

func (*AudioSynthesisPlayer) Run ¶

func (player *AudioSynthesisPlayer) Run(handler media.MediaHandler, ctx context.Context)

type AudioSynthesisRequest ¶

type AudioSynthesisRequest struct {
	PlayID string
	// contains filtered or unexported fields
}

func (*AudioSynthesisRequest) OnMessage ¶

func (req *AudioSynthesisRequest) OnMessage(data []byte)

func (*AudioSynthesisRequest) OnTimestamp ¶

func (req *AudioSynthesisRequest) OnTimestamp(timestamp SentenceTimestamp)

type AzureConfig ¶

type AzureConfig struct {
	SubscriptionKey string `json:"subscription_key" yaml:"subscription_key" env:"AZURE_SUBSCRIPTION_KEY"`
	Region          string `json:"region" yaml:"region" env:"AZURE_REGION"`
	Voice           string `json:"voice" yaml:"voice" default:"zh-CN-XiaoxiaoNeural"`
	Language        string `json:"language" yaml:"language"` // 语言代码，用于 SSML 的 xml:lang
	SampleRate      int    `json:"sample_rate" yaml:"sample_rate" default:"22050"`
	Channels        int    `json:"channels" yaml:"channels" default:"1"`
	BitDepth        int    `json:"bit_depth" yaml:"bit_depth" default:"16"`
	Codec           string `json:"codec" yaml:"codec" default:"audio-24khz-48kbitrate-mono-mp3"`
	FrameDuration   string `json:"frame_duration" yaml:"frame_duration" default:"20ms"`
	Timeout         int    `json:"timeout" yaml:"timeout" default:"30"`
	BaseURL         string `json:"base_url" yaml:"base_url"`
}

AzureConfig Azure TTS配置

func NewAzureConfig ¶

func NewAzureConfig(subscriptionKey, region string) AzureConfig

NewAzureConfig 创建 Azure TTS 配置

func (*AzureConfig) GetProvider ¶

func (c *AzureConfig) GetProvider() TTSProvider

GetProvider returns the TTS provider type

type AzureRequest ¶

type AzureRequest struct {
	Text string `json:"text"`
}

AzureRequest Azure TTS API 请求

type AzureService ¶

type AzureService struct {
	// contains filtered or unexported fields
}

func NewAzureService ¶

func NewAzureService(opt AzureConfig) *AzureService

NewAzureService 创建 Azure TTS 服务

func (*AzureService) CacheKey ¶

func (as *AzureService) CacheKey(text string) string

func (*AzureService) Close ¶

func (as *AzureService) Close() error

func (*AzureService) Format ¶

func (as *AzureService) Format() media.StreamFormat

func (*AzureService) Provider ¶

func (as *AzureService) Provider() TTSProvider

func (*AzureService) Synthesize ¶

func (as *AzureService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error

type BaiduTTSConfig ¶

type BaiduTTSConfig struct {
	Tok           string `json:"tok" env:"tok" env:"BAIDU_ACCESS_TOKEN"`
	Cuid          string `json:"cuid" env:"cuid"`
	Ctp           string `json:"ctp" env:"ctp" default:"1"`
	Lan           string `json:"lan" env:"lan" default:"zh"`
	Spd           string `json:"spd" env:"spd" default:"5"`
	Pit           string `json:"pit" env:"pit" default:"5"`
	Vol           string `json:"vol" env:"vol" default:"5"`
	Aue           string `json:"aue" env:"aue" default:"3"`
	Channels      int    `json:"channels" env:"channels" default:"1"`
	SampleRate    int    `json:"sampleRate" env:"sample_rate" default:"16000"`
	BitDepth      int    `json:"bitDepth" env:"bit_depth" default:"16"`
	FrameDuration string `json:"frameDuration" env:"frame_duration" default:"20ms"`
}

func NewBaiduTTSOption ¶

func NewBaiduTTSOption(token string) BaiduTTSConfig

func (*BaiduTTSConfig) GetProvider ¶

func (c *BaiduTTSConfig) GetProvider() TTSProvider

GetProvider returns the TTS provider type

func (*BaiduTTSConfig) String ¶

func (opt *BaiduTTSConfig) String() string

type BaiduTTSService ¶

type BaiduTTSService struct {
	// contains filtered or unexported fields
}

func NewBaiduService ¶

func NewBaiduService(opt BaiduTTSConfig) *BaiduTTSService

func (*BaiduTTSService) CacheKey ¶

func (bs *BaiduTTSService) CacheKey(text string) string

func (*BaiduTTSService) Close ¶

func (bs *BaiduTTSService) Close() error

func (*BaiduTTSService) DoubleURLEncode ¶

func (bs *BaiduTTSService) DoubleURLEncode(text string) string

func (*BaiduTTSService) Format ¶

func (bs *BaiduTTSService) Format() media.StreamFormat

func (*BaiduTTSService) Provider ¶

func (bs *BaiduTTSService) Provider() TTSProvider

func (*BaiduTTSService) Synthesize ¶

func (bs *BaiduTTSService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error

type CoquiResponse ¶

type CoquiResponse struct {
	Audio string `json:"audio"`
}

type CoquiService ¶

type CoquiService struct {
	// contains filtered or unexported fields
}

func NewCoquiService ¶

func NewCoquiService(opt CoquiTTSOption) *CoquiService

func (*CoquiService) CacheKey ¶

func (c *CoquiService) CacheKey(text string) string

func (*CoquiService) Close ¶

func (c *CoquiService) Close() error

func (*CoquiService) Format ¶

func (c *CoquiService) Format() media.StreamFormat

func (*CoquiService) Provider ¶

func (c *CoquiService) Provider() TTSProvider

func (*CoquiService) Synthesize ¶

func (c *CoquiService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error

type CoquiTTSOption ¶

type CoquiTTSOption struct {
	Url           string `json:"url" yaml:"url" env:"COQUI_URL"`
	Language      string `json:"language" yaml:"language" default:"en_US"`
	Speaker       string `json:"speaker" yaml:"speaker" default:"p226"`
	SampleRate    int    `json:"sampleRate" yaml:"sample_rate" default:"16000"`
	Channels      int    `json:"channels" yaml:"channels" default:"1"`
	BitDepth      int    `json:"bitDepth" yaml:"bit_depth" default:"16"`
	FrameDuration string `json:"frameDuration" yaml:"frame_duration" default:"20ms"`
}

func NewCoquiTTSOption ¶

func NewCoquiTTSOption(url string) CoquiTTSOption

func (*CoquiTTSOption) GetProvider ¶

func (c *CoquiTTSOption) GetProvider() TTSProvider

GetProvider returns the TTS provider type

func (*CoquiTTSOption) String ¶

func (opt *CoquiTTSOption) String() string

type DefaultSynthesisFactory ¶

type DefaultSynthesisFactory struct {
	// contains filtered or unexported fields
}

DefaultSynthesisFactory 默认TTS工厂实现

func NewSynthesisFactory ¶

func NewSynthesisFactory() *DefaultSynthesisFactory

NewSynthesisFactory 创建新的TTS工厂实例

func (*DefaultSynthesisFactory) CreateEngine ¶

func (f *DefaultSynthesisFactory) CreateEngine(config SynthesisConfig) (AudioSynthesisEngine, error)

CreateEngine 创建 AudioSynthesisEngine

func (*DefaultSynthesisFactory) GetSupportedProviders ¶

func (f *DefaultSynthesisFactory) GetSupportedProviders() []TTSProvider

GetSupportedProviders 获取支持的提供商列表

func (*DefaultSynthesisFactory) IsProviderSupported ¶

func (f *DefaultSynthesisFactory) IsProviderSupported(provider TTSProvider) bool

IsProviderSupported 检查提供商是否支持

func (*DefaultSynthesisFactory) RegisterCreator ¶

func (f *DefaultSynthesisFactory) RegisterCreator(provider TTSProvider, creator func(SynthesisConfig) (AudioSynthesisEngine, error))

RegisterCreator 注册创建函数

type ElevenLabsConfig ¶

type ElevenLabsConfig struct {
	APIKey        string `json:"api_key" yaml:"api_key" env:"ELEVENLABS_API_KEY"`
	VoiceID       string `json:"voice_id" yaml:"voice_id" default:"21m00Tcm4TlvDq8ikWAM"` // 默认 Rachel 音色
	ModelID       string `json:"model_id" yaml:"model_id" default:"eleven_monolingual_v1"`
	LanguageCode  string `json:"language_code" yaml:"language_code"` // 语言代码，如 en, zh, ja 等
	SampleRate    int    `json:"sample_rate" yaml:"sample_rate" default:"44100"`
	Channels      int    `json:"channels" yaml:"channels" default:"1"`
	BitDepth      int    `json:"bit_depth" yaml:"bit_depth" default:"16"`
	Codec         string `json:"codec" yaml:"codec" default:"mp3"`
	FrameDuration string `json:"frame_duration" yaml:"frame_duration" default:"20ms"`
	Timeout       int    `json:"timeout" yaml:"timeout" default:"30"`
	// 语音设置
	Stability       float64 `json:"stability" yaml:"stability" default:"0.5"`                // 0.0-1.0
	SimilarityBoost float64 `json:"similarity_boost" yaml:"similarity_boost" default:"0.75"` // 0.0-1.0
	Style           float64 `json:"style" yaml:"style" default:"0.0"`                        // 0.0-1.0
	UseSpeakerBoost bool    `json:"use_speaker_boost" yaml:"use_speaker_boost" default:"true"`
}

ElevenLabsConfig ElevenLabs TTS配置

func NewElevenLabsConfig ¶

func NewElevenLabsConfig(apiKey, voiceID string) ElevenLabsConfig

NewElevenLabsConfig 创建 ElevenLabs TTS 配置

func (*ElevenLabsConfig) GetProvider ¶

func (c *ElevenLabsConfig) GetProvider() TTSProvider

GetProvider returns the TTS provider type

type ElevenLabsRequest ¶

type ElevenLabsRequest struct {
	Text          string                   `json:"text"`
	ModelID       string                   `json:"model_id,omitempty"`
	VoiceSettings *ElevenLabsVoiceSettings `json:"voice_settings,omitempty"`
	LanguageCode  string                   `json:"language_code,omitempty"`
}

ElevenLabsRequest ElevenLabs API 请求

type ElevenLabsService ¶

type ElevenLabsService struct {
	// contains filtered or unexported fields
}

func NewElevenLabsService ¶

func NewElevenLabsService(opt ElevenLabsConfig) *ElevenLabsService

NewElevenLabsService 创建 ElevenLabs TTS 服务

func (*ElevenLabsService) CacheKey ¶

func (es *ElevenLabsService) CacheKey(text string) string

func (*ElevenLabsService) Close ¶

func (es *ElevenLabsService) Close() error

func (*ElevenLabsService) Format ¶

func (es *ElevenLabsService) Format() media.StreamFormat

func (*ElevenLabsService) Provider ¶

func (es *ElevenLabsService) Provider() TTSProvider

func (*ElevenLabsService) Synthesize ¶

func (es *ElevenLabsService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error

type ElevenLabsVoiceSettings ¶

type ElevenLabsVoiceSettings struct {
	Stability       float64 `json:"stability"`
	SimilarityBoost float64 `json:"similarity_boost"`
	Style           float64 `json:"style"`
	UseSpeakerBoost bool    `json:"use_speaker_boost"`
}

ElevenLabsVoiceSettings 音色设置

type FishAudioConfig ¶

type FishAudioConfig struct {
	APIKey      string  `json:"api_key" yaml:"api_key" env:"FISHAUDIO_API_KEY"`
	ReferenceID string  `json:"reference_id" yaml:"reference_id" default:""` // 模型ID
	Model       string  `json:"model" yaml:"model" default:"s1"`             // 模型版本: s1, speech-1.6, speech-1.5
	SampleRate  int     `json:"sample_rate" yaml:"sample_rate" default:"44100"`
	Channels    int     `json:"channels" yaml:"channels" default:"1"`
	BitDepth    int     `json:"bit_depth" yaml:"bit_depth" default:"16"`
	Format      string  `json:"format" yaml:"format" default:"mp3"` // wav, pcm, mp3, opus
	Temperature float64 `json:"temperature" yaml:"temperature" default:"0.7"`
	TopP        float64 `json:"top_p" yaml:"top_p" default:"0.7"`
	Latency     string  `json:"latency" yaml:"latency" default:"normal"` // low, normal, balanced
	ChunkLength int     `json:"chunk_length" yaml:"chunk_length" default:"300"`
	Normalize   bool    `json:"normalize" yaml:"normalize" default:"true"`
	MPEGBitrate int     `json:"mp3_bitrate" yaml:"mp3_bitrate" default:"128"` // 64, 128, 192
	Timeout     int     `json:"timeout" yaml:"timeout" default:"30"`
}

FishAudioConfig Fish Audio TTS 配置

func NewFishAudioConfig ¶

func NewFishAudioConfig(apiKey, referenceID string) FishAudioConfig

NewFishAudioConfig 创建 Fish Audio TTS 配置

func (*FishAudioConfig) GetProvider ¶

func (c *FishAudioConfig) GetProvider() TTSProvider

GetProvider returns the TTS provider type

type FishAudioListModelsResponse ¶

type FishAudioListModelsResponse struct {
	Total int                    `json:"total"`
	Items []FishAudioVoiceOption `json:"items"`
}

FishAudioListModelsResponse Fish Audio API 返回的模型列表响应

type FishAudioRequest ¶

type FishAudioRequest struct {
	Text                      string  `json:"text"`
	Model                     string  `json:"model"`
	ReferenceID               string  `json:"reference_id,omitempty"`
	Temperature               float64 `json:"temperature,omitempty"`
	TopP                      float64 `json:"top_p,omitempty"`
	Format                    string  `json:"format,omitempty"`
	SampleRate                *int    `json:"sample_rate,omitempty"`
	ChunkLength               int     `json:"chunk_length,omitempty"`
	Normalize                 bool    `json:"normalize,omitempty"`
	Latency                   string  `json:"latency,omitempty"`
	MaxNewTokens              int     `json:"max_new_tokens,omitempty"`
	RepetitionPenalty         float64 `json:"repetition_penalty,omitempty"`
	MinChunkLength            int     `json:"min_chunk_length,omitempty"`
	ConditionOnPreviousChunks bool    `json:"condition_on_previous_chunks,omitempty"`
	EarlyStopThreshold        float64 `json:"early_stop_threshold,omitempty"`
}

FishAudioRequest Fish Audio TTS 请求

type FishAudioService ¶

type FishAudioService struct {
	// contains filtered or unexported fields
}

func NewFishAudioService ¶

func NewFishAudioService(opt FishAudioConfig) *FishAudioService

NewFishAudioService 创建 Fish Audio TTS 服务

func (*FishAudioService) CacheKey ¶

func (fa *FishAudioService) CacheKey(text string) string

func (*FishAudioService) Close ¶

func (fa *FishAudioService) Close() error

func (*FishAudioService) Format ¶

func (fa *FishAudioService) Format() media.StreamFormat

func (*FishAudioService) Provider ¶

func (fa *FishAudioService) Provider() TTSProvider

func (*FishAudioService) Synthesize ¶

func (fa *FishAudioService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error

type FishAudioVoiceOption ¶

type FishAudioVoiceOption struct {
	ID          string   `json:"_id"`
	Title       string   `json:"title"`
	Description string   `json:"description"`
	Type        string   `json:"type"`
	State       string   `json:"state"`
	CoverImage  string   `json:"cover_image"`
	Languages   []string `json:"languages"`
	Author      struct {
		ID       string `json:"_id"`
		Nickname string `json:"nickname"`
		Avatar   string `json:"avatar"`
	} `json:"author"`
}

FishAudioVoiceOption Fish Audio 音色选项

func GetFishAudioVoices ¶

func GetFishAudioVoices(apiKey string) ([]FishAudioVoiceOption, error)

GetFishAudioVoices 从 Fish Audio API 获取可用的音色列表 apiKey: Fish Audio API Key returns: 音色列表和错误信息

type FishSpeechConfig ¶

type FishSpeechConfig struct {
	APIKey        string `json:"api_key" yaml:"api_key" env:"FISHSPEECH_API_KEY"`
	ReferenceID   string `json:"reference_id" yaml:"reference_id" default:"default"` // 模型ID
	SampleRate    int    `json:"sample_rate" yaml:"sample_rate" default:"24000"`
	Channels      int    `json:"channels" yaml:"channels" default:"1"`
	BitDepth      int    `json:"bit_depth" yaml:"bit_depth" default:"16"`
	Codec         string `json:"codec" yaml:"codec" default:"wav"`
	FrameDuration string `json:"frame_duration" yaml:"frame_duration" default:"20ms"`
	Timeout       int    `json:"timeout" yaml:"timeout" default:"30"`
	Latency       string `json:"latency" yaml:"latency" default:"normal"` // normal, balanced
	Version       string `json:"version" yaml:"version" default:"s1"`
}

FishSpeechConfig FishSpeech TTS配置

func NewFishSpeechConfig ¶

func NewFishSpeechConfig(apiKey, referenceID string) FishSpeechConfig

NewFishSpeechConfig 创建 FishSpeech TTS 配置

func (*FishSpeechConfig) GetProvider ¶

func (c *FishSpeechConfig) GetProvider() TTSProvider

GetProvider returns the TTS provider type

type FishSpeechListModelsRequest ¶

type FishSpeechListModelsRequest struct {
	ModelType       string `json:"modelType,omitempty"`       // 可选，模型类型："public" | "personal" | "all"
	IncludePersonal bool   `json:"includePersonal,omitempty"` // 可选，是否包含个人模型
	Page            int    `json:"page,omitempty"`            // 可选，页码，从1开始
	PageSize        int    `json:"pageSize,omitempty"`        // 可选，每页条数
}

FishSpeechListModelsRequest FishSpeech 列表模型请求

type FishSpeechListModelsResponse ¶

type FishSpeechListModelsResponse struct {
	Total      int                     `json:"total"`
	Page       int                     `json:"page"`
	PageSize   int                     `json:"pageSize"`
	TotalPages int                     `json:"totalPages"`
	Items      []FishSpeechVoiceOption `json:"items"`
}

FishSpeechListModelsResponse FishSpeech API 返回的模型列表响应

type FishSpeechService ¶

type FishSpeechService struct {
	// contains filtered or unexported fields
}

func NewFishSpeechService ¶

func NewFishSpeechService(opt FishSpeechConfig) *FishSpeechService

NewFishSpeechService 创建 FishSpeech TTS 服务

func (*FishSpeechService) CacheKey ¶

func (fs *FishSpeechService) CacheKey(text string) string

func (*FishSpeechService) Close ¶

func (fs *FishSpeechService) Close() error

func (*FishSpeechService) Format ¶

func (fs *FishSpeechService) Format() media.StreamFormat

func (*FishSpeechService) Provider ¶

func (fs *FishSpeechService) Provider() TTSProvider

func (*FishSpeechService) Synthesize ¶

func (fs *FishSpeechService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error

type FishSpeechV2Request ¶

type FishSpeechV2Request struct {
	Text        string `json:"text"`
	ReferenceID string `json:"reference_id,omitempty"`
	Format      string `json:"format,omitempty"`
	Latency     string `json:"latency,omitempty"`
}

FishSpeechV2Request WebSocket v2 请求

type FishSpeechV2Response ¶

type FishSpeechV2Response struct {
	Type    string `json:"type"`              // "audio" 或 "error"
	Data    string `json:"data,omitempty"`    // base64 编码的音频数据
	Error   string `json:"error,omitempty"`   // 错误信息
	Message string `json:"message,omitempty"` // 消息
}

FishSpeechV2Response WebSocket v2 响应

type FishSpeechVoiceOption ¶

type FishSpeechVoiceOption struct {
	ModelID     string `json:"modelId"`     // 模型 ID
	Title       string `json:"title"`       // 模型名称
	Description string `json:"description"` // 模型描述
	IsPersonal  bool   `json:"isPersonal"`  // 是否为个人模型
}

FishSpeechVoiceOption FishSpeech 音色选项

func GetFishSpeechVoices ¶

func GetFishSpeechVoices(apiKey string) ([]FishSpeechVoiceOption, error)

GetFishSpeechVoices 从 FishSpeech API 获取可用的音色列表 apiKey: FishSpeech API Key returns: 音色列表和错误信息

type GoogleService ¶

type GoogleService struct {
	// contains filtered or unexported fields
}

func NewGoogleService ¶

func NewGoogleService(opt GoogleTTSOption) *GoogleService

func (*GoogleService) CacheKey ¶

func (gs *GoogleService) CacheKey(text string) string

func (*GoogleService) Close ¶

func (gs *GoogleService) Close() error

func (*GoogleService) Format ¶

func (gs *GoogleService) Format() media.StreamFormat

func (*GoogleService) Provider ¶

func (gs *GoogleService) Provider() TTSProvider

func (*GoogleService) Synthesize ¶

func (gs *GoogleService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error

type GoogleTTSOption ¶

type GoogleTTSOption struct {
	LanguageCode  string                         `json:"languageCode" yaml:"language_code"`
	SsmlGender    texttospeechpb.SsmlVoiceGender `json:"ssmlGender" yaml:"ssml_gender"`
	AudioEncoding texttospeechpb.AudioEncoding   `json:"audioEncoding" yaml:"audio_encoding" default:"LINEAR16"`
	SampleRate    int                            `json:"sampleRate" yaml:"sample_rate" default:"16000"`
	Channels      int                            `json:"channels" yaml:"channels" default:"1"`
	BitDepth      int                            `json:"bitDepth" yaml:"bit_depth" default:"16"`
	FrameDuration string                         `json:"frameDuration" yaml:"frame_duration" default:"20ms"`
}

func NewGoogleTTSOption ¶

func NewGoogleTTSOption(languageCode string) GoogleTTSOption

func (*GoogleTTSOption) GetProvider ¶

func (c *GoogleTTSOption) GetProvider() TTSProvider

GetProvider returns the TTS provider type

func (*GoogleTTSOption) String ¶

func (opt *GoogleTTSOption) String() string

type LocalGoSpeechConfig ¶

type LocalGoSpeechConfig struct {
	Provider    LocalGoSpeechProvider `json:"provider"`    // TTS提供商
	ModelPath   string                `json:"modelPath"`   // 模型文件路径（可选）
	Language    string                `json:"language"`    // 语言代码
	Speaker     string                `json:"speaker"`     // 发音人
	SampleRate  int                   `json:"sampleRate"`  // 采样率
	Channels    int                   `json:"channels"`    // 声道数
	BitDepth    int                   `json:"bitDepth"`    // 位深度
	Speed       float32               `json:"speed"`       // 语速
	Pitch       float32               `json:"pitch"`       // 音调
	Volume      float32               `json:"volume"`      // 音量
	EnableCache bool                  `json:"enableCache"` // 是否启用缓存
	CacheExpiry time.Duration         `json:"cacheExpiry"` // 缓存过期时间
	Command     string                `json:"command"`     // 自定义命令
	OutputDir   string                `json:"outputDir"`   // 输出目录
}

LocalGoSpeechConfig 本地TTS配置

func NewLocalGoSpeechConfig ¶

func NewLocalGoSpeechConfig(provider LocalGoSpeechProvider, modelPath string) *LocalGoSpeechConfig

NewLocalGoSpeechConfig 创建默认本地TTS配置

func (*LocalGoSpeechConfig) GetProvider ¶

func (c *LocalGoSpeechConfig) GetProvider() TTSProvider

GetProvider returns the TTS provider type

type LocalGoSpeechProvider ¶

type LocalGoSpeechProvider string

LocalGoSpeechProvider 本地TTS提供商类型

const (
	LocalGoSpeechProviderEspeak   LocalGoSpeechProvider = "espeak"
	LocalGoSpeechProviderSay      LocalGoSpeechProvider = "say"
	LocalGoSpeechProviderFestival LocalGoSpeechProvider = "festival"
	LocalGoSpeechProviderPico     LocalGoSpeechProvider = "pico"
)

type LocalGoSpeechService ¶

type LocalGoSpeechService struct {
	// contains filtered or unexported fields
}

LocalGoSpeechService 本地TTS服务

func NewLocalGoSpeechService ¶

func NewLocalGoSpeechService(config *LocalGoSpeechConfig) (*LocalGoSpeechService, error)

NewLocalGoSpeechService 创建本地TTS服务

func (*LocalGoSpeechService) CacheKey ¶

func (s *LocalGoSpeechService) CacheKey(text string) string

CacheKey 生成缓存键

func (*LocalGoSpeechService) Close ¶

func (s *LocalGoSpeechService) Close() error

Close 关闭服务

func (*LocalGoSpeechService) Format ¶

func (s *LocalGoSpeechService) Format() media.StreamFormat

Format 返回音频格式

func (*LocalGoSpeechService) GetConfig ¶

func (s *LocalGoSpeechService) GetConfig() *LocalGoSpeechConfig

GetConfig 获取配置

func (*LocalGoSpeechService) GetSupportedLanguages ¶

func (s *LocalGoSpeechService) GetSupportedLanguages() []string

GetSupportedLanguages 获取支持的语言列表

func (*LocalGoSpeechService) GetSupportedSpeakers ¶

func (s *LocalGoSpeechService) GetSupportedSpeakers() []string

GetSupportedSpeakers 获取支持的发音人列表

func (*LocalGoSpeechService) IsReady ¶

func (s *LocalGoSpeechService) IsReady() bool

IsReady 检查服务是否就绪

func (*LocalGoSpeechService) Provider ¶

func (s *LocalGoSpeechService) Provider() TTSProvider

Provider 返回提供商

func (*LocalGoSpeechService) Synthesize ¶

func (s *LocalGoSpeechService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error

Synthesize 合成语音

func (*LocalGoSpeechService) UpdateConfig ¶

func (s *LocalGoSpeechService) UpdateConfig(config *LocalGoSpeechConfig) error

UpdateConfig 更新配置

type LocalService ¶

type LocalService struct {
	// contains filtered or unexported fields
}

func NewLocalService ¶

func NewLocalService(opt LocalTTSConfig) *LocalService

NewLocalService 创建本地TTS服务

func (*LocalService) CacheKey ¶

func (ls *LocalService) CacheKey(text string) string

func (*LocalService) Close ¶

func (ls *LocalService) Close() error

func (*LocalService) Format ¶

func (ls *LocalService) Format() media.StreamFormat

func (*LocalService) Provider ¶

func (ls *LocalService) Provider() TTSProvider

func (*LocalService) Synthesize ¶

func (ls *LocalService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error

type LocalTTSConfig ¶

type LocalTTSConfig struct {
	Command       string `json:"command" yaml:"command" default:"say"`           // TTS 命令（如 say, festival, espeak）
	Voice         string `json:"voice" yaml:"voice" default:""`                  // 音色（可选）
	SampleRate    int    `json:"sample_rate" yaml:"sample_rate" default:"16000"` // 采样率
	Channels      int    `json:"channels" yaml:"channels" default:"1"`           // 声道数
	BitDepth      int    `json:"bit_depth" yaml:"bit_depth" default:"16"`        // 位深度
	Codec         string `json:"codec" yaml:"codec" default:"wav"`               // 音频编解码器
	FrameDuration string `json:"frame_duration" yaml:"frame_duration" default:"20ms"`
	OutputDir     string `json:"output_dir" yaml:"output_dir" default:"/tmp"` // 输出目录
}

LocalTTSConfig 本地TTS配置

func NewLocalTTSConfig ¶

func NewLocalTTSConfig(command string) LocalTTSConfig

NewLocalTTSConfig 创建本地TTS配置

func (*LocalTTSConfig) GetProvider ¶

func (c *LocalTTSConfig) GetProvider() TTSProvider

GetProvider returns the TTS provider type

type MinimaxAudioSetting ¶

type MinimaxAudioSetting struct {
	SampleRate int    `json:"sample_rate"`
	Bitrate    *int   `json:"bitrate,omitempty"`
	Format     string `json:"format"`
	Channel    int    `json:"channel"`
}

type MinimaxConnectionResponse ¶

type MinimaxConnectionResponse struct {
	SessionID string `json:"session_id"`
	Event     string `json:"event"`
	TraceID   string `json:"trace_id"`
	BaseResp  struct {
		StatusCode int    `json:"status_code"`
		StatusMsg  string `json:"status_msg"`
	} `json:"base_resp"`
}

type MinimaxOption ¶

type MinimaxOption struct {
	Model         string  `json:"model" yaml:"model" default:"speech-2.5-turbo-preview"`
	APIKey        string  `json:"apiKey" yaml:"api_key" env:"MINIMAX_API_KEY"`
	VoiceID       string  `json:"voiceId" yaml:"voice_id" default:"male-qn-qingse"`
	SpeedRatio    float64 `json:"speedRatio" yaml:"speed_ratio" default:"1.0"`
	Volume        float64 `json:"volume" yaml:"volume" default:"1.0"`
	Pitch         float64 `json:"pitch" yaml:"pitch" default:"0.0"`
	Emotion       string  `json:"emotion" yaml:"emotion" default:"neutral"`
	LanguageBoost string  `json:"languageBoost" yaml:"language_boost" default:"auto"`
	TrainingTimes int     `json:"trainingTimes" yaml:"training_times" default:"1"`

	SampleRate    int    `json:"sampleRate" yaml:"sample_rate" default:"8000"`
	Bitrate       int    `json:"bitrate" yaml:"bitrate" default:"16"`
	Format        string `json:"format" yaml:"format" default:"pcm"`
	Channels      int    `json:"channels" yaml:"channels" default:"1"`
	FrameDuration string `json:"frameDuration" yaml:"frame_duration" default:"20ms"`
}

func NewMinimaxOption ¶

func NewMinimaxOption(apiKey string) MinimaxOption

func (*MinimaxOption) GetProvider ¶

func (c *MinimaxOption) GetProvider() TTSProvider

GetProvider returns the TTS provider type

func (*MinimaxOption) String ¶

func (opt *MinimaxOption) String() string

type MinimaxPronunciationDict ¶

type MinimaxPronunciationDict struct {
}

type MinimaxService ¶

type MinimaxService struct {
	ConnSessionID string
	TraceID       string
	// contains filtered or unexported fields
}

func NewMinimaxService ¶

func NewMinimaxService(opt MinimaxOption) *MinimaxService

func (*MinimaxService) CacheKey ¶

func (ms *MinimaxService) CacheKey(text string) string

func (*MinimaxService) Close ¶

func (ms *MinimaxService) Close() error

func (*MinimaxService) Format ¶

func (ms *MinimaxService) Format() media.StreamFormat

func (*MinimaxService) GetConnSessionID ¶

func (ms *MinimaxService) GetConnSessionID() string

func (*MinimaxService) GetTraceID ¶

func (ms *MinimaxService) GetTraceID() string

func (*MinimaxService) Provider ¶

func (ms *MinimaxService) Provider() TTSProvider

func (*MinimaxService) Synthesize ¶

func (ms *MinimaxService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error

type MinimaxTaskContinueResponse ¶

type MinimaxTaskContinueResponse struct {
	Data struct {
		Audio string `json:"audio"`
	} `json:"data"`
	SessionID string `json:"session_id"`
	Event     string `json:"event"`
	IsFinal   bool   `json:"is_final"`
	TraceID   string `json:"trace_id"`
	BaseResp  struct {
		StatusCode int    `json:"status_code"`
		StatusMsg  string `json:"status_msg"`
	} `json:"base_resp"`
}

type MinimaxTaskStartRequest ¶

type MinimaxTaskStartRequest struct {
	Event             string                    `json:"event"`
	Model             string                    `json:"model"`
	VoiceSetting      MinimaxVoiceSetting       `json:"voice_setting"`
	AudioSetting      MinimaxAudioSetting       `json:"audio_setting"`
	PronunciationDict *MinimaxPronunciationDict `json:"pronunciation_dict,omitempty"`
	LanguageBoost     string                    `json:"language_boost,omitempty"`
}

type MinimaxTaskStartResponse ¶

type MinimaxTaskStartResponse struct {
	Event    string `json:"event"`
	BaseResp struct {
		StatusCode int    `json:"status_code"`
		StatusMsg  string `json:"status_msg"`
	} `json:"base_resp"`
}

type MinimaxTimbreWeight ¶

type MinimaxTimbreWeight struct {
	VoiceID string `json:"voice_id"`
	Weight  int    `json:"weight"`
}

type MinimaxVoiceSetting ¶

type MinimaxVoiceSetting struct {
	VoiceID       string                `json:"voice_id,omitempty"`
	Weight        int                   `json:"weight,omitempty"`
	TimbreWeights []MinimaxTimbreWeight `json:"timbre_weights,omitempty"`
	Speed         float64               `json:"speed"`
	Volume        float64               `json:"vol"`
	Pitch         float64               `json:"pitch"`
	Emotion       string                `json:"emotion"`
	ToneList      []string              `json:"tonelist,omitempty"`
}

type OpenAIConfig ¶

type OpenAIConfig struct {
	APIKey        string  `json:"api_key" yaml:"api_key" env:"OPENAI_API_KEY"`
	Model         string  `json:"model" yaml:"model" default:"tts-1"`
	Voice         string  `json:"voice" yaml:"voice" default:"alloy"`
	Speed         float64 `json:"speed" yaml:"speed" default:"1.0"`
	SampleRate    int     `json:"sample_rate" yaml:"sample_rate" default:"24000"`
	Channels      int     `json:"channels" yaml:"channels" default:"1"`
	BitDepth      int     `json:"bit_depth" yaml:"bit_depth" default:"16"`
	Codec         string  `json:"codec" yaml:"codec" default:"mp3"`
	FrameDuration string  `json:"frame_duration" yaml:"frame_duration" default:"20ms"`
	Timeout       int     `json:"timeout" yaml:"timeout" default:"30"`
	BaseURL       string  `json:"base_url" yaml:"base_url" default:"https://api.openai.com"`
}

OpenAIConfig OpenAI TTS配置

func NewOpenAIConfig ¶

func NewOpenAIConfig(apiKey string) OpenAIConfig

NewOpenAIConfig 创建 OpenAI TTS 配置

func (*OpenAIConfig) GetProvider ¶

func (c *OpenAIConfig) GetProvider() TTSProvider

GetProvider returns the TTS provider type

type OpenAIRequest ¶

type OpenAIRequest struct {
	Model          string  `json:"model"`
	Input          string  `json:"input"`
	Voice          string  `json:"voice"`
	ResponseFormat string  `json:"response_format,omitempty"`
	Speed          float64 `json:"speed,omitempty"`
}

OpenAIRequest OpenAI API 请求

type OpenAIService ¶

type OpenAIService struct {
	// contains filtered or unexported fields
}

func NewOpenAIService ¶

func NewOpenAIService(opt OpenAIConfig) *OpenAIService

NewOpenAIService 创建 OpenAI TTS 服务

func (*OpenAIService) CacheKey ¶

func (os *OpenAIService) CacheKey(text string) string

func (*OpenAIService) Close ¶

func (os *OpenAIService) Close() error

func (*OpenAIService) Format ¶

func (os *OpenAIService) Format() media.StreamFormat

func (*OpenAIService) Provider ¶

func (os *OpenAIService) Provider() TTSProvider

func (*OpenAIService) Synthesize ¶

func (os *OpenAIService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error

type PlayRecord ¶

type PlayRecord struct {
	// contains filtered or unexported fields
}

type QCloudService ¶

type QCloudService struct {
	// contains filtered or unexported fields
}

func NewQCloudService ¶

func NewQCloudService(opt QCloudTTSConfig) *QCloudService

func (*QCloudService) CacheKey ¶

func (qs *QCloudService) CacheKey(text string) string

func (*QCloudService) Close ¶

func (qs *QCloudService) Close() error

func (*QCloudService) Format ¶

func (qs *QCloudService) Format() media.StreamFormat

func (*QCloudService) Provider ¶

func (qs *QCloudService) Provider() TTSProvider

func (*QCloudService) Synthesize ¶

func (qs *QCloudService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error

type QCloudTTSConfig ¶

type QCloudTTSConfig struct {
	AppID         int64  `json:"appId" yaml:"app_id" env:"QCLOUD_APP_ID"`
	SecretID      string `json:"secretId" yaml:"secret_id" env:"QCLOUD_SECRET_ID"`
	SecretKey     string `json:"secret" yaml:"secret" env:"QCLOUD_SECRET"`
	VoiceType     int64  `json:"voiceType" yaml:"voice_type" default:"1005"`
	ModelType     int64  `json:"modelType" yaml:"model_type" default:"1"`
	Language      string `json:"language" yaml:"language"` // 语言代码，如 zh-CN, en-US（腾讯云通过音色类型区分语言，此字段用于配置和缓存）
	SampleRate    int    `json:"sampleRate" yaml:"sample_rate" default:"8000"`
	Channels      int    `json:"channels" yaml:"channels" default:"1"`
	BitDepth      int    `json:"bitDepth" yaml:"bit_depth" default:"16"`
	Codec         string `json:"codec" yaml:"codec" default:"pcm"`
	FrameDuration string `json:"frameDuration" yaml:"frame_duration" default:"20ms"`
	// Speed is Tencent TTS speed level (typically -2~6, 0 means default).
	Speed int64 `json:"speed" yaml:"speed" default:"0"`
}

QCloudTTSConfig teccent tts config

func NewQcloudTTSConfig ¶

func NewQcloudTTSConfig(appId string, secretId string, secretKey string, voiceType int64, codec string, sample int) QCloudTTSConfig

func (*QCloudTTSConfig) GetProvider ¶

func (c *QCloudTTSConfig) GetProvider() TTSProvider

GetProvider returns the TTS provider type

func (*QCloudTTSConfig) ToString ¶

func (opt *QCloudTTSConfig) ToString() string

type QiniuService ¶

type QiniuService struct {
	// contains filtered or unexported fields
}

func NewQiniuService ¶

func NewQiniuService(opt QiniuTTSConfig) *QiniuService

NewQiniuService 创建七牛云TTS服务

func (*QiniuService) CacheKey ¶

func (qs *QiniuService) CacheKey(text string) string

func (*QiniuService) Close ¶

func (qs *QiniuService) Close() error

func (*QiniuService) Format ¶

func (qs *QiniuService) Format() media.StreamFormat

func (*QiniuService) Provider ¶

func (qs *QiniuService) Provider() TTSProvider

func (*QiniuService) Synthesize ¶

func (qs *QiniuService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error

type QiniuTTSConfig ¶

type QiniuTTSConfig struct {
	APIKey        string `json:"api_key" yaml:"api_key" env:"QINIU_TTS_API_KEY"`
	BaseURL       string `json:"base_url" yaml:"base_url" env:"QINIU_TTS_BASE_URL"`
	VoiceType     string `json:"voice_type" yaml:"voice_type" default:"female_cn_001"`
	SampleRate    int    `json:"sample_rate" yaml:"sample_rate" default:"16000"`
	Channels      int    `json:"channels" yaml:"channels" default:"1"`
	BitDepth      int    `json:"bit_depth" yaml:"bit_depth" default:"16"`
	Codec         string `json:"codec" yaml:"codec" default:"pcm"`
	FrameDuration string `json:"frame_duration" yaml:"frame_duration" default:"20ms"`
	Timeout       int    `json:"timeout" yaml:"timeout" default:"30"`
	Retries       int    `json:"retries" yaml:"retries" default:"0"`
}

QiniuTTSConfig 七牛云TTS配置

func NewQiniuTTSConfig ¶

func NewQiniuTTSConfig(apiKey, baseURL string) QiniuTTSConfig

NewQiniuTTSConfig 创建七牛云TTS配置

func (*QiniuTTSConfig) GetProvider ¶

func (c *QiniuTTSConfig) GetProvider() TTSProvider

GetProvider returns the TTS provider type

type QiniuTTSRequest ¶

type QiniuTTSRequest struct {
	Audio   TTSAudio       `json:"audio"`
	Request TTSRequestData `json:"request"`
}

QiniuTTSRequest 七牛云TTS请求结构

type QiniuTTSResponse ¶

type QiniuTTSResponse struct {
	Reqid     string       `json:"reqid"`
	Operation string       `json:"operation"`
	Sequence  int          `json:"sequence"`
	Data      string       `json:"data"`
	Addition  *TTSAddition `json:"addition,omitempty"`
}

QiniuTTSResponse 七牛云TTS响应结构

type SentenceTimestamp ¶

type SentenceTimestamp struct {
	Words []Word `json:"words"`
}

type SynthesisBuffer ¶

type SynthesisBuffer struct {
	Data      []byte
	Timestamp SentenceTimestamp
}

func (*SynthesisBuffer) OnMessage ¶

func (s *SynthesisBuffer) OnMessage(data []byte)

func (*SynthesisBuffer) OnTimestamp ¶

func (s *SynthesisBuffer) OnTimestamp(timestamp SentenceTimestamp)

type SynthesisConfig ¶

type SynthesisConfig interface {
	GetProvider() TTSProvider
}

SynthesisConfig 统一的TTS配置接口

type SynthesisFactory ¶

type SynthesisFactory interface {
	// CreateEngine 根据配置创建 AudioSynthesisEngine
	CreateEngine(config SynthesisConfig) (AudioSynthesisEngine, error)
	// GetSupportedProviders 获取支持的提供商列表
	GetSupportedProviders() []TTSProvider
	// IsProviderSupported 检查提供商是否支持
	IsProviderSupported(provider TTSProvider) bool
	// RegisterCreator 注册创建函数
	RegisterCreator(provider TTSProvider, creator func(SynthesisConfig) (AudioSynthesisEngine, error))
}

SynthesisFactory TTS工厂接口

func GetGlobalSynthesisFactory ¶

func GetGlobalSynthesisFactory() SynthesisFactory

GetGlobalSynthesisFactory 获取全局TTS工厂实例

type TTSAddition ¶

type TTSAddition struct {
	Duration string `json:"duration"`
}

TTSAddition TTS附加信息

type TTSAudio ¶

type TTSAudio struct {
	VoiceType  string  `json:"voice_type"`
	Encoding   string  `json:"encoding"`
	SpeedRatio float64 `json:"speed_ratio,omitempty"`
}

TTSAudio TTS音频配置

type TTSCredentialConfig ¶

type TTSCredentialConfig map[string]interface{}

TTSCredentialConfig TTS凭证配置结构（灵活的键值对配置）

type TTSProvider ¶

type TTSProvider string

TTSProvider TTS服务提供商类型

const (
	// ProviderQiniu 七牛云TTS
	ProviderQiniu TTSProvider = "qiniu"
	// ProviderXunfei 讯飞TTS
	ProviderXunfei TTSProvider = "xunfei"
	// ProviderAliyun 阿里云TTS
	ProviderAliyun TTSProvider = "aliyun"
	// ProviderTencent 腾讯云TTS
	ProviderTencent TTSProvider = "qcloud"
	// ProviderBaidu 百度TTS
	ProviderBaidu TTSProvider = "baidu"
	// ProviderAzure 微软Azure TTS
	ProviderAzure TTSProvider = "azure"
	// ProviderGoogle Google Cloud TTS
	ProviderGoogle TTSProvider = "google"
	// ProviderAWS Amazon Polly TTS
	ProviderAWS TTSProvider = "aws"
	// ProviderOpenAI OpenAI TTS
	ProviderOpenAI TTSProvider = "openai"
	// ProviderElevenLabs ElevenLabs TTS
	ProviderElevenLabs TTSProvider = "elevenlabs"
	// ProviderLocal 本地TTS
	ProviderLocal TTSProvider = "local"
	// ProviderLocalGoSpeech 本地go-speech TTS
	ProviderLocalGoSpeech TTSProvider = "local_gospeech"
	// ProviderFishSpeech FishSpeech TTS
	ProviderFishSpeech TTSProvider = "fishspeech"
	// ProviderFishAudio Fish Audio TTS
	ProviderFishAudio TTSProvider = "fishaudio"
	// ProviderCoqui Coqui TTS
	ProviderCoqui TTSProvider = "coqui"
	// ProviderVolcengine 火山引擎标准TTS
	ProviderVolcengine TTSProvider = "volcengine"
	// ProviderMinimax Minimax TTS
	ProviderMinimax TTSProvider = "minimax"
)

func (TTSProvider) ToString ¶

func (tp TTSProvider) ToString() string

type TTSRequestData ¶

type TTSRequestData struct {
	Text string `json:"text"`
}

TTSRequestData TTS请求数据

type VolcAddition ¶

type VolcAddition struct {
	Frontend string `json:"frontend"`
}

VolcAddition 火山引擎附加信息

type VolcengineService ¶

type VolcengineService struct {
	// contains filtered or unexported fields
}

VolcengineService 火山引擎标准TTS服务

func NewVolcengineService ¶

func NewVolcengineService(opt VolcengineTTSOption) *VolcengineService

NewVolcengineService 创建火山引擎TTS服务

func (*VolcengineService) CacheKey ¶

func (v *VolcengineService) CacheKey(text string) string

func (*VolcengineService) Close ¶

func (v *VolcengineService) Close() error

func (*VolcengineService) Format ¶

func (v *VolcengineService) Format() media.StreamFormat

func (*VolcengineService) Provider ¶

func (v *VolcengineService) Provider() TTSProvider

func (*VolcengineService) Synthesize ¶

func (v *VolcengineService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error

type VolcengineTTSOption ¶

type VolcengineTTSOption struct {
	AppID         string  `json:"appID"`         // 应用ID
	AccessToken   string  `json:"accessToken"`   // 访问令牌
	Cluster       string  `json:"cluster"`       // 集群名称，如 volcano_tts
	VoiceType     string  `json:"voiceType"`     // 音色类型，如 BV700_streaming
	Rate          int     `json:"rate"`          // 采样率，默认 8000
	Encoding      string  `json:"encoding"`      // 编码格式，默认 pcm
	SpeedRatio    float32 `json:"speedRatio"`    // 语速比例，默认 1.0
	VolumeRatio   float32 `json:"volumeRatio"`   // 音量比例，默认 1.0
	PitchRatio    float32 `json:"pitchRatio"`    // 音调比例，默认 1.0
	Channels      int     `json:"channels"`      // 声道数，默认 1
	BitDepth      int     `json:"bitDepth"`      // 位深度，默认 16
	FrameDuration string  `json:"frameDuration"` // 帧时长，默认 20ms
	TextType      string  `json:"textType"`      // 文本类型，plain 或 ssml
	Ssml          bool    `json:"ssml"`          // 是否使用 SSML
}

VolcengineTTSOption 火山引擎标准TTS配置支持的常用音色类型（VoiceType）： - BV700_streaming: 默认音色 - BV700_V2_streaming: V2版本 - BV213_streaming: 广西老表（男声） - BV025_streaming: 甜美台妹（女声）更多音色类型请参考火山引擎官方文档

func NewVolcengineTTSOption ¶

func NewVolcengineTTSOption(appID, accessToken, cluster string) VolcengineTTSOption

NewVolcengineTTSOption 创建火山引擎TTS配置

func (*VolcengineTTSOption) GetProvider ¶

func (c *VolcengineTTSOption) GetProvider() TTSProvider

GetProvider returns the TTS provider type

type VolcengineTTSServResponse ¶

type VolcengineTTSServResponse struct {
	ReqID     string       `json:"reqid"`
	Code      int          `json:"code"`
	Message   string       `json:"message"`
	Operation string       `json:"operation"`
	Sequence  int          `json:"sequence"`
	Data      string       `json:"data"`
	Addition  VolcAddition `json:"addition"`
}

VolcengineTTSServResponse 火山引擎TTS响应结构

type WSAudio ¶

type WSAudio struct {
	Encoding   string `json:"encoding"`
	SampleRate int    `json:"sample_rate"`
}

WSAudio WebSocket音频参数

type WSHeader ¶

type WSHeader struct {
	AppID  string `json:"app_id"`
	Status int    `json:"status"`
	ResID  string `json:"res_id"`
}

WSHeader WebSocket请求头

type WSParameter ¶

type WSParameter struct {
	TTS WSTTS `json:"tts"`
}

WSParameter WebSocket参数

type WSPayload ¶

type WSPayload struct {
	Text struct {
		Encoding string `json:"encoding"`
		Compress string `json:"compress"`
		Format   string `json:"format"`
		Status   int    `json:"status"`
		Seq      int    `json:"seq"`
		Text     string `json:"text"`
	} `json:"text"`
}

WSPayload WebSocket载荷

type WSRequest ¶

type WSRequest struct {
	Header    WSHeader    `json:"header"`
	Parameter WSParameter `json:"parameter"`
	Payload   WSPayload   `json:"payload"`
}

WSRequest WebSocket请求结构

type WSTTS ¶

type WSTTS struct {
	Vcn      string  `json:"vcn"`
	Volume   int     `json:"volume"`
	Rhy      int     `json:"rhy"`
	Pybuffer int     `json:"pybuffer"`
	Speed    int     `json:"speed"`
	Pitch    int     `json:"pitch"`
	Bgs      int     `json:"bgs"`
	Reg      int     `json:"reg"`
	Rdn      int     `json:"rdn"`
	Audio    WSAudio `json:"audio"`
}

WSTTS WebSocket TTS参数

type Word ¶

type Word struct {
	Confidence float64 `json:"confidence"`
	EndTime    int     `json:"end_time"`
	StartTime  int     `json:"start_time"`
	Word       string  `json:"word"`
}

type XunfeiService ¶

type XunfeiService struct {
	// contains filtered or unexported fields
}

func NewXunfeiService ¶

func NewXunfeiService(opt XunfeiTTSConfig) *XunfeiService

NewXunfeiService 创建讯飞TTS服务

func (*XunfeiService) CacheKey ¶

func (xs *XunfeiService) CacheKey(text string) string

func (*XunfeiService) Close ¶

func (xs *XunfeiService) Close() error

func (*XunfeiService) Format ¶

func (xs *XunfeiService) Format() media.StreamFormat

func (*XunfeiService) Provider ¶

func (xs *XunfeiService) Provider() TTSProvider

func (*XunfeiService) Synthesize ¶

func (xs *XunfeiService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error

type XunfeiTTSConfig ¶

type XunfeiTTSConfig struct {
	AppID         string `json:"app_id" yaml:"app_id" env:"XUNFEI_APP_ID"`
	APIKey        string `json:"api_key" yaml:"api_key" env:"XUNFEI_API_KEY"`
	APISecret     string `json:"api_secret" yaml:"api_secret" env:"XUNFEI_API_SECRET"`
	SampleRate    int    `json:"sample_rate" yaml:"sample_rate" default:"24000"`
	Channels      int    `json:"channels" yaml:"channels" default:"1"`
	BitDepth      int    `json:"bit_depth" yaml:"bit_depth" default:"16"`
	Codec         string `json:"codec" yaml:"codec" default:"raw"`
	FrameDuration string `json:"frame_duration" yaml:"frame_duration" default:"20ms"`
	Timeout       int    `json:"timeout" yaml:"timeout" default:"30"`
}

XunfeiTTSConfig 讯飞TTS配置

func NewXunfeiTTSConfig ¶

func NewXunfeiTTSConfig(appID, apiKey, apiSecret string) XunfeiTTSConfig

NewXunfeiTTSConfig 创建讯飞TTS配置

func (*XunfeiTTSConfig) GetProvider ¶

func (c *XunfeiTTSConfig) GetProvider() TTSProvider

GetProvider returns the TTS provider type

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL