synthesizer

package
v1.2.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 6, 2026 License: MIT Imports: 38 Imported by: 0

Documentation

Index

Constants

View Source
const (
	MinimaxWebSocketURL         = "wss://api.minimaxi.com/ws/v1/t2a_v2"
	MinimaxSpeech25TurboPreview = "speech-2.5-turbo-preview"
)
View Source
const (
	TTS_QCLOUD            = "tts.qcloud"
	TTS_XUNFEI            = "tts.xunfei"
	TTS_QINIU             = "tts.qiniu"
	TTS_BAIDU             = "tts.baidu"
	TTS_GOOGLE            = "tts.google"
	TTS_AWS               = "tts.aws"
	TTS_AZURE             = "tts.azure"
	TTS_OPENAI            = "tts.openai"
	TTS_ELEVENLABS        = "tts.elevenlabs"
	TTS_LOCAL             = "tts.local"
	TTS_LOCAL_GOSPEECH    = "tts.local_gospeech"
	TTS_FISHSPEECH        = "tts.fishspeech"
	TTS_FISHAUDIO         = "tts.fishaudio"
	TTS_COQUI             = "tts.coqui"
	TTS_VOLCENGINE        = "tts.volcengine"
	TTS_VOLCENGINE_CLONE  = "tts.volcengine_clone"
	TTS_VOLCENGINE_LLM    = "tts.volcengine_llm"
	TTS_VOLCENGINE_STREAM = "tts.volcengine_stream"
	TTS_MINIMAX           = "tts.minimax"
)
View Source
const (
	SsmlSpeak = "<speak>"

	VolcengineCloneCluster = "volcano_icl"
	VolcengineLLMCluster   = "volcano_tts"
)

Variables

This section is empty.

Functions

func CheckLocalTTSAvailable

func CheckLocalTTSAvailable() []string

CheckLocalTTSAvailable 检查本地是否安装了 TTS 工具

func ComputeSampleByteCount

func ComputeSampleByteCount(sampleRate, bitDepth, channels int) int

ComputeSampleByteCount computes the number of bytes for audio samples based on sample rate, bit depth, and number of channels. Formula: (sampleRate * bitDepth * channels) / 8

func DetectLocalTTSCommand

func DetectLocalTTSCommand() string

DetectLocalTTSCommand 自动检测可用的本地 TTS 命令

func GetAzureVoices

func GetAzureVoices() map[string]string

GetAzureVoices 获取可用的 Azure 音色列表(示例)

func GetLocalTTSInfo

func GetLocalTTSInfo() map[string]interface{}

GetLocalTTSInfo 获取本地 TTS 信息

func NormalizeFramePeriod

func NormalizeFramePeriod(d string) time.Duration

ValidateAndNormalizeDuration uses different validation logic with explicit bounds checking

func SetGlobalSynthesisFactory

func SetGlobalSynthesisFactory(factory SynthesisFactory)

SetGlobalSynthesisFactory 设置全局TTS工厂实例

func StripEmoji

func StripEmoji(text string) string

Types

type AmazonService

type AmazonService struct {
	// contains filtered or unexported fields
}

func NewAmazonService

func NewAmazonService(opt AmazonTTSConfig) *AmazonService

func (*AmazonService) CacheKey

func (as *AmazonService) CacheKey(text string) string

func (*AmazonService) Close

func (as *AmazonService) Close() error

func (*AmazonService) Format

func (as *AmazonService) Format() media.StreamFormat

func (*AmazonService) Provider

func (as *AmazonService) Provider() TTSProvider

func (*AmazonService) Synthesize

func (as *AmazonService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error

type AmazonTTSConfig

type AmazonTTSConfig struct {
	SampleRate    int                `json:"sampleRate" env:"sample_rate" default:"16000"`
	Region        string             `json:"region"`
	OutputFormat  types.OutputFormat `json:"outputFormat" env:"output_format" default:"pcm"`
	VoiceId       types.VoiceId      `json:"voiceId" env:"voice_id"`
	Channels      int                `json:"channels" env:"channels" default:"1"`
	BitDepth      int                `json:"bitDepth" env:"bit_depth" default:"16"`
	FrameDuration string             `json:"frameDuration" env:"frame_duration" default:"20ms"`
}

func NewAmazonTTSOption

func NewAmazonTTSOption(region string, outputFormat types.OutputFormat, voiceId types.VoiceId) AmazonTTSConfig

func (*AmazonTTSConfig) GetProvider

func (c *AmazonTTSConfig) GetProvider() TTSProvider

GetProvider returns the TTS provider type

func (*AmazonTTSConfig) String

func (opt *AmazonTTSConfig) String() string

type AudioAudioSynthesisPlayerRequest

type AudioAudioSynthesisPlayerRequest struct {
	// contains filtered or unexported fields
}

type AudioSynthesisEngine

type AudioSynthesisEngine interface {
	Provider() TTSProvider
	Format() media.StreamFormat
	CacheKey(text string) string
	Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
	Close() error
}

AudioSynthesisEngine is the core interface for TTS (Text-to-Speech) synthesis.

func NewAudioSynthesisEngine

func NewAudioSynthesisEngine(name string, options map[string]any) (AudioSynthesisEngine, error)

func NewAudioSynthesisEngineFromCredential

func NewAudioSynthesisEngineFromCredential(config TTSCredentialConfig) (AudioSynthesisEngine, error)

NewAudioSynthesisEngineFromCredential 根据凭证配置创建TTS服务

type AudioSynthesisHandler

type AudioSynthesisHandler interface {
	OnMessage([]byte)
	OnTimestamp(timestamp SentenceTimestamp)
}

AudioSynthesisHandler is the callback interface for TTS synthesis events.

type AudioSynthesisPlayer

type AudioSynthesisPlayer struct {
	SenderName string
	Format     media.StreamFormat
	// contains filtered or unexported fields
}

func NewAudioSynthesisPlayer

func NewAudioSynthesisPlayer(vendor string, format media.StreamFormat) *AudioSynthesisPlayer

func (*AudioSynthesisPlayer) Close

func (player *AudioSynthesisPlayer) Close()

func (*AudioSynthesisPlayer) Emit

func (player *AudioSynthesisPlayer) Emit(h media.MediaHandler, audioPacket *media.AudioPacket, inputRate int)

func (*AudioSynthesisPlayer) EmitStopPlayState

func (player *AudioSynthesisPlayer) EmitStopPlayState(h media.MediaHandler, duration string, playId string, sequence int, reason string, sourceText string)

func (*AudioSynthesisPlayer) Interrupt

func (player *AudioSynthesisPlayer) Interrupt(h media.MediaHandler, reason string)

func (*AudioSynthesisPlayer) Run

func (player *AudioSynthesisPlayer) Run(handler media.MediaHandler, ctx context.Context)

type AudioSynthesisRequest

type AudioSynthesisRequest struct {
	PlayID string
	// contains filtered or unexported fields
}

func (*AudioSynthesisRequest) OnMessage

func (req *AudioSynthesisRequest) OnMessage(data []byte)

func (*AudioSynthesisRequest) OnTimestamp

func (req *AudioSynthesisRequest) OnTimestamp(timestamp SentenceTimestamp)

type AzureConfig

type AzureConfig struct {
	SubscriptionKey string `json:"subscription_key" yaml:"subscription_key" env:"AZURE_SUBSCRIPTION_KEY"`
	Region          string `json:"region" yaml:"region" env:"AZURE_REGION"`
	Voice           string `json:"voice" yaml:"voice" default:"zh-CN-XiaoxiaoNeural"`
	Language        string `json:"language" yaml:"language"` // 语言代码,用于 SSML 的 xml:lang
	SampleRate      int    `json:"sample_rate" yaml:"sample_rate" default:"22050"`
	Channels        int    `json:"channels" yaml:"channels" default:"1"`
	BitDepth        int    `json:"bit_depth" yaml:"bit_depth" default:"16"`
	Codec           string `json:"codec" yaml:"codec" default:"audio-24khz-48kbitrate-mono-mp3"`
	FrameDuration   string `json:"frame_duration" yaml:"frame_duration" default:"20ms"`
	Timeout         int    `json:"timeout" yaml:"timeout" default:"30"`
	BaseURL         string `json:"base_url" yaml:"base_url"`
}

AzureConfig Azure TTS配置

func NewAzureConfig

func NewAzureConfig(subscriptionKey, region string) AzureConfig

NewAzureConfig 创建 Azure TTS 配置

func (*AzureConfig) GetProvider

func (c *AzureConfig) GetProvider() TTSProvider

GetProvider returns the TTS provider type

type AzureRequest

type AzureRequest struct {
	Text string `json:"text"`
}

AzureRequest Azure TTS API 请求

type AzureService

type AzureService struct {
	// contains filtered or unexported fields
}

func NewAzureService

func NewAzureService(opt AzureConfig) *AzureService

NewAzureService 创建 Azure TTS 服务

func (*AzureService) CacheKey

func (as *AzureService) CacheKey(text string) string

func (*AzureService) Close

func (as *AzureService) Close() error

func (*AzureService) Format

func (as *AzureService) Format() media.StreamFormat

func (*AzureService) Provider

func (as *AzureService) Provider() TTSProvider

func (*AzureService) Synthesize

func (as *AzureService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error

type BaiduTTSConfig

type BaiduTTSConfig struct {
	Tok           string `json:"tok" env:"tok" env:"BAIDU_ACCESS_TOKEN"`
	Cuid          string `json:"cuid" env:"cuid"`
	Ctp           string `json:"ctp" env:"ctp" default:"1"`
	Lan           string `json:"lan" env:"lan" default:"zh"`
	Spd           string `json:"spd" env:"spd" default:"5"`
	Pit           string `json:"pit" env:"pit" default:"5"`
	Vol           string `json:"vol" env:"vol" default:"5"`
	Aue           string `json:"aue" env:"aue" default:"3"`
	Channels      int    `json:"channels" env:"channels" default:"1"`
	SampleRate    int    `json:"sampleRate" env:"sample_rate" default:"16000"`
	BitDepth      int    `json:"bitDepth" env:"bit_depth" default:"16"`
	FrameDuration string `json:"frameDuration" env:"frame_duration" default:"20ms"`
}

func NewBaiduTTSOption

func NewBaiduTTSOption(token string) BaiduTTSConfig

func (*BaiduTTSConfig) GetProvider

func (c *BaiduTTSConfig) GetProvider() TTSProvider

GetProvider returns the TTS provider type

func (*BaiduTTSConfig) String

func (opt *BaiduTTSConfig) String() string

type BaiduTTSService

type BaiduTTSService struct {
	// contains filtered or unexported fields
}

func NewBaiduService

func NewBaiduService(opt BaiduTTSConfig) *BaiduTTSService

func (*BaiduTTSService) CacheKey

func (bs *BaiduTTSService) CacheKey(text string) string

func (*BaiduTTSService) Close

func (bs *BaiduTTSService) Close() error

func (*BaiduTTSService) DoubleURLEncode

func (bs *BaiduTTSService) DoubleURLEncode(text string) string

func (*BaiduTTSService) Format

func (bs *BaiduTTSService) Format() media.StreamFormat

func (*BaiduTTSService) Provider

func (bs *BaiduTTSService) Provider() TTSProvider

func (*BaiduTTSService) Synthesize

func (bs *BaiduTTSService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error

type CoquiResponse

type CoquiResponse struct {
	Audio string `json:"audio"`
}

type CoquiService

type CoquiService struct {
	// contains filtered or unexported fields
}

func NewCoquiService

func NewCoquiService(opt CoquiTTSOption) *CoquiService

func (*CoquiService) CacheKey

func (c *CoquiService) CacheKey(text string) string

func (*CoquiService) Close

func (c *CoquiService) Close() error

func (*CoquiService) Format

func (c *CoquiService) Format() media.StreamFormat

func (*CoquiService) Provider

func (c *CoquiService) Provider() TTSProvider

func (*CoquiService) Synthesize

func (c *CoquiService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error

type CoquiTTSOption

type CoquiTTSOption struct {
	Url           string `json:"url" yaml:"url" env:"COQUI_URL"`
	Language      string `json:"language" yaml:"language" default:"en_US"`
	Speaker       string `json:"speaker" yaml:"speaker" default:"p226"`
	SampleRate    int    `json:"sampleRate" yaml:"sample_rate" default:"16000"`
	Channels      int    `json:"channels" yaml:"channels" default:"1"`
	BitDepth      int    `json:"bitDepth" yaml:"bit_depth" default:"16"`
	FrameDuration string `json:"frameDuration" yaml:"frame_duration" default:"20ms"`
}

func NewCoquiTTSOption

func NewCoquiTTSOption(url string) CoquiTTSOption

func (*CoquiTTSOption) GetProvider

func (c *CoquiTTSOption) GetProvider() TTSProvider

GetProvider returns the TTS provider type

func (*CoquiTTSOption) String

func (opt *CoquiTTSOption) String() string

type DefaultSynthesisFactory

type DefaultSynthesisFactory struct {
	// contains filtered or unexported fields
}

DefaultSynthesisFactory 默认TTS工厂实现

func NewSynthesisFactory

func NewSynthesisFactory() *DefaultSynthesisFactory

NewSynthesisFactory 创建新的TTS工厂实例

func (*DefaultSynthesisFactory) CreateEngine

CreateEngine 创建 AudioSynthesisEngine

func (*DefaultSynthesisFactory) GetSupportedProviders

func (f *DefaultSynthesisFactory) GetSupportedProviders() []TTSProvider

GetSupportedProviders 获取支持的提供商列表

func (*DefaultSynthesisFactory) IsProviderSupported

func (f *DefaultSynthesisFactory) IsProviderSupported(provider TTSProvider) bool

IsProviderSupported 检查提供商是否支持

func (*DefaultSynthesisFactory) RegisterCreator

func (f *DefaultSynthesisFactory) RegisterCreator(provider TTSProvider, creator func(SynthesisConfig) (AudioSynthesisEngine, error))

RegisterCreator 注册创建函数

type ElevenLabsConfig

type ElevenLabsConfig struct {
	APIKey        string `json:"api_key" yaml:"api_key" env:"ELEVENLABS_API_KEY"`
	VoiceID       string `json:"voice_id" yaml:"voice_id" default:"21m00Tcm4TlvDq8ikWAM"` // 默认 Rachel 音色
	ModelID       string `json:"model_id" yaml:"model_id" default:"eleven_monolingual_v1"`
	LanguageCode  string `json:"language_code" yaml:"language_code"` // 语言代码,如 en, zh, ja 等
	SampleRate    int    `json:"sample_rate" yaml:"sample_rate" default:"44100"`
	Channels      int    `json:"channels" yaml:"channels" default:"1"`
	BitDepth      int    `json:"bit_depth" yaml:"bit_depth" default:"16"`
	Codec         string `json:"codec" yaml:"codec" default:"mp3"`
	FrameDuration string `json:"frame_duration" yaml:"frame_duration" default:"20ms"`
	Timeout       int    `json:"timeout" yaml:"timeout" default:"30"`
	// 语音设置
	Stability       float64 `json:"stability" yaml:"stability" default:"0.5"`                // 0.0-1.0
	SimilarityBoost float64 `json:"similarity_boost" yaml:"similarity_boost" default:"0.75"` // 0.0-1.0
	Style           float64 `json:"style" yaml:"style" default:"0.0"`                        // 0.0-1.0
	UseSpeakerBoost bool    `json:"use_speaker_boost" yaml:"use_speaker_boost" default:"true"`
}

ElevenLabsConfig ElevenLabs TTS配置

func NewElevenLabsConfig

func NewElevenLabsConfig(apiKey, voiceID string) ElevenLabsConfig

NewElevenLabsConfig 创建 ElevenLabs TTS 配置

func (*ElevenLabsConfig) GetProvider

func (c *ElevenLabsConfig) GetProvider() TTSProvider

GetProvider returns the TTS provider type

type ElevenLabsRequest

type ElevenLabsRequest struct {
	Text          string                   `json:"text"`
	ModelID       string                   `json:"model_id,omitempty"`
	VoiceSettings *ElevenLabsVoiceSettings `json:"voice_settings,omitempty"`
	LanguageCode  string                   `json:"language_code,omitempty"`
}

ElevenLabsRequest ElevenLabs API 请求

type ElevenLabsService

type ElevenLabsService struct {
	// contains filtered or unexported fields
}

func NewElevenLabsService

func NewElevenLabsService(opt ElevenLabsConfig) *ElevenLabsService

NewElevenLabsService 创建 ElevenLabs TTS 服务

func (*ElevenLabsService) CacheKey

func (es *ElevenLabsService) CacheKey(text string) string

func (*ElevenLabsService) Close

func (es *ElevenLabsService) Close() error

func (*ElevenLabsService) Format

func (es *ElevenLabsService) Format() media.StreamFormat

func (*ElevenLabsService) Provider

func (es *ElevenLabsService) Provider() TTSProvider

func (*ElevenLabsService) Synthesize

func (es *ElevenLabsService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error

type ElevenLabsVoiceSettings

type ElevenLabsVoiceSettings struct {
	Stability       float64 `json:"stability"`
	SimilarityBoost float64 `json:"similarity_boost"`
	Style           float64 `json:"style"`
	UseSpeakerBoost bool    `json:"use_speaker_boost"`
}

ElevenLabsVoiceSettings 音色设置

type FishAudioConfig

type FishAudioConfig struct {
	APIKey      string  `json:"api_key" yaml:"api_key" env:"FISHAUDIO_API_KEY"`
	ReferenceID string  `json:"reference_id" yaml:"reference_id" default:""` // 模型ID
	Model       string  `json:"model" yaml:"model" default:"s1"`             // 模型版本: s1, speech-1.6, speech-1.5
	SampleRate  int     `json:"sample_rate" yaml:"sample_rate" default:"44100"`
	Channels    int     `json:"channels" yaml:"channels" default:"1"`
	BitDepth    int     `json:"bit_depth" yaml:"bit_depth" default:"16"`
	Format      string  `json:"format" yaml:"format" default:"mp3"` // wav, pcm, mp3, opus
	Temperature float64 `json:"temperature" yaml:"temperature" default:"0.7"`
	TopP        float64 `json:"top_p" yaml:"top_p" default:"0.7"`
	Latency     string  `json:"latency" yaml:"latency" default:"normal"` // low, normal, balanced
	ChunkLength int     `json:"chunk_length" yaml:"chunk_length" default:"300"`
	Normalize   bool    `json:"normalize" yaml:"normalize" default:"true"`
	MPEGBitrate int     `json:"mp3_bitrate" yaml:"mp3_bitrate" default:"128"` // 64, 128, 192
	Timeout     int     `json:"timeout" yaml:"timeout" default:"30"`
}

FishAudioConfig Fish Audio TTS 配置

func NewFishAudioConfig

func NewFishAudioConfig(apiKey, referenceID string) FishAudioConfig

NewFishAudioConfig 创建 Fish Audio TTS 配置

func (*FishAudioConfig) GetProvider

func (c *FishAudioConfig) GetProvider() TTSProvider

GetProvider returns the TTS provider type

type FishAudioListModelsResponse

type FishAudioListModelsResponse struct {
	Total int                    `json:"total"`
	Items []FishAudioVoiceOption `json:"items"`
}

FishAudioListModelsResponse Fish Audio API 返回的模型列表响应

type FishAudioRequest

type FishAudioRequest struct {
	Text                      string  `json:"text"`
	Model                     string  `json:"model"`
	ReferenceID               string  `json:"reference_id,omitempty"`
	Temperature               float64 `json:"temperature,omitempty"`
	TopP                      float64 `json:"top_p,omitempty"`
	Format                    string  `json:"format,omitempty"`
	SampleRate                *int    `json:"sample_rate,omitempty"`
	ChunkLength               int     `json:"chunk_length,omitempty"`
	Normalize                 bool    `json:"normalize,omitempty"`
	Latency                   string  `json:"latency,omitempty"`
	MaxNewTokens              int     `json:"max_new_tokens,omitempty"`
	RepetitionPenalty         float64 `json:"repetition_penalty,omitempty"`
	MinChunkLength            int     `json:"min_chunk_length,omitempty"`
	ConditionOnPreviousChunks bool    `json:"condition_on_previous_chunks,omitempty"`
	EarlyStopThreshold        float64 `json:"early_stop_threshold,omitempty"`
}

FishAudioRequest Fish Audio TTS 请求

type FishAudioService

type FishAudioService struct {
	// contains filtered or unexported fields
}

func NewFishAudioService

func NewFishAudioService(opt FishAudioConfig) *FishAudioService

NewFishAudioService 创建 Fish Audio TTS 服务

func (*FishAudioService) CacheKey

func (fa *FishAudioService) CacheKey(text string) string

func (*FishAudioService) Close

func (fa *FishAudioService) Close() error

func (*FishAudioService) Format

func (fa *FishAudioService) Format() media.StreamFormat

func (*FishAudioService) Provider

func (fa *FishAudioService) Provider() TTSProvider

func (*FishAudioService) Synthesize

func (fa *FishAudioService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error

type FishAudioVoiceOption

type FishAudioVoiceOption struct {
	ID          string   `json:"_id"`
	Title       string   `json:"title"`
	Description string   `json:"description"`
	Type        string   `json:"type"`
	State       string   `json:"state"`
	CoverImage  string   `json:"cover_image"`
	Languages   []string `json:"languages"`
	Author      struct {
		ID       string `json:"_id"`
		Nickname string `json:"nickname"`
		Avatar   string `json:"avatar"`
	} `json:"author"`
}

FishAudioVoiceOption Fish Audio 音色选项

func GetFishAudioVoices

func GetFishAudioVoices(apiKey string) ([]FishAudioVoiceOption, error)

GetFishAudioVoices 从 Fish Audio API 获取可用的音色列表 apiKey: Fish Audio API Key returns: 音色列表和错误信息

type FishSpeechConfig

type FishSpeechConfig struct {
	APIKey        string `json:"api_key" yaml:"api_key" env:"FISHSPEECH_API_KEY"`
	ReferenceID   string `json:"reference_id" yaml:"reference_id" default:"default"` // 模型ID
	SampleRate    int    `json:"sample_rate" yaml:"sample_rate" default:"24000"`
	Channels      int    `json:"channels" yaml:"channels" default:"1"`
	BitDepth      int    `json:"bit_depth" yaml:"bit_depth" default:"16"`
	Codec         string `json:"codec" yaml:"codec" default:"wav"`
	FrameDuration string `json:"frame_duration" yaml:"frame_duration" default:"20ms"`
	Timeout       int    `json:"timeout" yaml:"timeout" default:"30"`
	Latency       string `json:"latency" yaml:"latency" default:"normal"` // normal, balanced
	Version       string `json:"version" yaml:"version" default:"s1"`
}

FishSpeechConfig FishSpeech TTS配置

func NewFishSpeechConfig

func NewFishSpeechConfig(apiKey, referenceID string) FishSpeechConfig

NewFishSpeechConfig 创建 FishSpeech TTS 配置

func (*FishSpeechConfig) GetProvider

func (c *FishSpeechConfig) GetProvider() TTSProvider

GetProvider returns the TTS provider type

type FishSpeechListModelsRequest

type FishSpeechListModelsRequest struct {
	ModelType       string `json:"modelType,omitempty"`       // 可选,模型类型:"public" | "personal" | "all"
	IncludePersonal bool   `json:"includePersonal,omitempty"` // 可选,是否包含个人模型
	Page            int    `json:"page,omitempty"`            // 可选,页码,从1开始
	PageSize        int    `json:"pageSize,omitempty"`        // 可选,每页条数
}

FishSpeechListModelsRequest FishSpeech 列表模型请求

type FishSpeechListModelsResponse

type FishSpeechListModelsResponse struct {
	Total      int                     `json:"total"`
	Page       int                     `json:"page"`
	PageSize   int                     `json:"pageSize"`
	TotalPages int                     `json:"totalPages"`
	Items      []FishSpeechVoiceOption `json:"items"`
}

FishSpeechListModelsResponse FishSpeech API 返回的模型列表响应

type FishSpeechService

type FishSpeechService struct {
	// contains filtered or unexported fields
}

func NewFishSpeechService

func NewFishSpeechService(opt FishSpeechConfig) *FishSpeechService

NewFishSpeechService 创建 FishSpeech TTS 服务

func (*FishSpeechService) CacheKey

func (fs *FishSpeechService) CacheKey(text string) string

func (*FishSpeechService) Close

func (fs *FishSpeechService) Close() error

func (*FishSpeechService) Format

func (fs *FishSpeechService) Format() media.StreamFormat

func (*FishSpeechService) Provider

func (fs *FishSpeechService) Provider() TTSProvider

func (*FishSpeechService) Synthesize

func (fs *FishSpeechService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error

type FishSpeechV2Request

type FishSpeechV2Request struct {
	Text        string `json:"text"`
	ReferenceID string `json:"reference_id,omitempty"`
	Format      string `json:"format,omitempty"`
	Latency     string `json:"latency,omitempty"`
}

FishSpeechV2Request WebSocket v2 请求

type FishSpeechV2Response

type FishSpeechV2Response struct {
	Type    string `json:"type"`              // "audio" 或 "error"
	Data    string `json:"data,omitempty"`    // base64 编码的音频数据
	Error   string `json:"error,omitempty"`   // 错误信息
	Message string `json:"message,omitempty"` // 消息
}

FishSpeechV2Response WebSocket v2 响应

type FishSpeechVoiceOption

type FishSpeechVoiceOption struct {
	ModelID     string `json:"modelId"`     // 模型 ID
	Title       string `json:"title"`       // 模型名称
	Description string `json:"description"` // 模型描述
	IsPersonal  bool   `json:"isPersonal"`  // 是否为个人模型
}

FishSpeechVoiceOption FishSpeech 音色选项

func GetFishSpeechVoices

func GetFishSpeechVoices(apiKey string) ([]FishSpeechVoiceOption, error)

GetFishSpeechVoices 从 FishSpeech API 获取可用的音色列表 apiKey: FishSpeech API Key returns: 音色列表和错误信息

type GoogleService

type GoogleService struct {
	// contains filtered or unexported fields
}

func NewGoogleService

func NewGoogleService(opt GoogleTTSOption) *GoogleService

func (*GoogleService) CacheKey

func (gs *GoogleService) CacheKey(text string) string

func (*GoogleService) Close

func (gs *GoogleService) Close() error

func (*GoogleService) Format

func (gs *GoogleService) Format() media.StreamFormat

func (*GoogleService) Provider

func (gs *GoogleService) Provider() TTSProvider

func (*GoogleService) Synthesize

func (gs *GoogleService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error

type GoogleTTSOption

type GoogleTTSOption struct {
	LanguageCode  string                         `json:"languageCode" yaml:"language_code"`
	SsmlGender    texttospeechpb.SsmlVoiceGender `json:"ssmlGender" yaml:"ssml_gender"`
	AudioEncoding texttospeechpb.AudioEncoding   `json:"audioEncoding" yaml:"audio_encoding" default:"LINEAR16"`
	SampleRate    int                            `json:"sampleRate" yaml:"sample_rate" default:"16000"`
	Channels      int                            `json:"channels" yaml:"channels" default:"1"`
	BitDepth      int                            `json:"bitDepth" yaml:"bit_depth" default:"16"`
	FrameDuration string                         `json:"frameDuration" yaml:"frame_duration" default:"20ms"`
}

func NewGoogleTTSOption

func NewGoogleTTSOption(languageCode string) GoogleTTSOption

func (*GoogleTTSOption) GetProvider

func (c *GoogleTTSOption) GetProvider() TTSProvider

GetProvider returns the TTS provider type

func (*GoogleTTSOption) String

func (opt *GoogleTTSOption) String() string

type LocalGoSpeechConfig

type LocalGoSpeechConfig struct {
	Provider    LocalGoSpeechProvider `json:"provider"`    // TTS提供商
	ModelPath   string                `json:"modelPath"`   // 模型文件路径(可选)
	Language    string                `json:"language"`    // 语言代码
	Speaker     string                `json:"speaker"`     // 发音人
	SampleRate  int                   `json:"sampleRate"`  // 采样率
	Channels    int                   `json:"channels"`    // 声道数
	BitDepth    int                   `json:"bitDepth"`    // 位深度
	Speed       float32               `json:"speed"`       // 语速
	Pitch       float32               `json:"pitch"`       // 音调
	Volume      float32               `json:"volume"`      // 音量
	EnableCache bool                  `json:"enableCache"` // 是否启用缓存
	CacheExpiry time.Duration         `json:"cacheExpiry"` // 缓存过期时间
	Command     string                `json:"command"`     // 自定义命令
	OutputDir   string                `json:"outputDir"`   // 输出目录
}

LocalGoSpeechConfig 本地TTS配置

func NewLocalGoSpeechConfig

func NewLocalGoSpeechConfig(provider LocalGoSpeechProvider, modelPath string) *LocalGoSpeechConfig

NewLocalGoSpeechConfig 创建默认本地TTS配置

func (*LocalGoSpeechConfig) GetProvider

func (c *LocalGoSpeechConfig) GetProvider() TTSProvider

GetProvider returns the TTS provider type

type LocalGoSpeechProvider

type LocalGoSpeechProvider string

LocalGoSpeechProvider 本地TTS提供商类型

const (
	LocalGoSpeechProviderEspeak   LocalGoSpeechProvider = "espeak"
	LocalGoSpeechProviderSay      LocalGoSpeechProvider = "say"
	LocalGoSpeechProviderFestival LocalGoSpeechProvider = "festival"
	LocalGoSpeechProviderPico     LocalGoSpeechProvider = "pico"
)

type LocalGoSpeechService

type LocalGoSpeechService struct {
	// contains filtered or unexported fields
}

LocalGoSpeechService 本地TTS服务

func NewLocalGoSpeechService

func NewLocalGoSpeechService(config *LocalGoSpeechConfig) (*LocalGoSpeechService, error)

NewLocalGoSpeechService 创建本地TTS服务

func (*LocalGoSpeechService) CacheKey

func (s *LocalGoSpeechService) CacheKey(text string) string

CacheKey 生成缓存键

func (*LocalGoSpeechService) Close

func (s *LocalGoSpeechService) Close() error

Close 关闭服务

func (*LocalGoSpeechService) Format

Format 返回音频格式

func (*LocalGoSpeechService) GetConfig

func (s *LocalGoSpeechService) GetConfig() *LocalGoSpeechConfig

GetConfig 获取配置

func (*LocalGoSpeechService) GetSupportedLanguages

func (s *LocalGoSpeechService) GetSupportedLanguages() []string

GetSupportedLanguages 获取支持的语言列表

func (*LocalGoSpeechService) GetSupportedSpeakers

func (s *LocalGoSpeechService) GetSupportedSpeakers() []string

GetSupportedSpeakers 获取支持的发音人列表

func (*LocalGoSpeechService) IsReady

func (s *LocalGoSpeechService) IsReady() bool

IsReady 检查服务是否就绪

func (*LocalGoSpeechService) Provider

func (s *LocalGoSpeechService) Provider() TTSProvider

Provider 返回提供商

func (*LocalGoSpeechService) Synthesize

func (s *LocalGoSpeechService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error

Synthesize 合成语音

func (*LocalGoSpeechService) UpdateConfig

func (s *LocalGoSpeechService) UpdateConfig(config *LocalGoSpeechConfig) error

UpdateConfig 更新配置

type LocalService

type LocalService struct {
	// contains filtered or unexported fields
}

func NewLocalService

func NewLocalService(opt LocalTTSConfig) *LocalService

NewLocalService 创建本地TTS服务

func (*LocalService) CacheKey

func (ls *LocalService) CacheKey(text string) string

func (*LocalService) Close

func (ls *LocalService) Close() error

func (*LocalService) Format

func (ls *LocalService) Format() media.StreamFormat

func (*LocalService) Provider

func (ls *LocalService) Provider() TTSProvider

func (*LocalService) Synthesize

func (ls *LocalService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error

type LocalTTSConfig

type LocalTTSConfig struct {
	Command       string `json:"command" yaml:"command" default:"say"`           // TTS 命令(如 say, festival, espeak)
	Voice         string `json:"voice" yaml:"voice" default:""`                  // 音色(可选)
	SampleRate    int    `json:"sample_rate" yaml:"sample_rate" default:"16000"` // 采样率
	Channels      int    `json:"channels" yaml:"channels" default:"1"`           // 声道数
	BitDepth      int    `json:"bit_depth" yaml:"bit_depth" default:"16"`        // 位深度
	Codec         string `json:"codec" yaml:"codec" default:"wav"`               // 音频编解码器
	FrameDuration string `json:"frame_duration" yaml:"frame_duration" default:"20ms"`
	OutputDir     string `json:"output_dir" yaml:"output_dir" default:"/tmp"` // 输出目录
}

LocalTTSConfig 本地TTS配置

func NewLocalTTSConfig

func NewLocalTTSConfig(command string) LocalTTSConfig

NewLocalTTSConfig 创建本地TTS配置

func (*LocalTTSConfig) GetProvider

func (c *LocalTTSConfig) GetProvider() TTSProvider

GetProvider returns the TTS provider type

type MinimaxAudioSetting

type MinimaxAudioSetting struct {
	SampleRate int    `json:"sample_rate"`
	Bitrate    *int   `json:"bitrate,omitempty"`
	Format     string `json:"format"`
	Channel    int    `json:"channel"`
}

type MinimaxConnectionResponse

type MinimaxConnectionResponse struct {
	SessionID string `json:"session_id"`
	Event     string `json:"event"`
	TraceID   string `json:"trace_id"`
	BaseResp  struct {
		StatusCode int    `json:"status_code"`
		StatusMsg  string `json:"status_msg"`
	} `json:"base_resp"`
}

type MinimaxOption

type MinimaxOption struct {
	Model         string  `json:"model" yaml:"model" default:"speech-2.5-turbo-preview"`
	APIKey        string  `json:"apiKey" yaml:"api_key" env:"MINIMAX_API_KEY"`
	VoiceID       string  `json:"voiceId" yaml:"voice_id" default:"male-qn-qingse"`
	SpeedRatio    float64 `json:"speedRatio" yaml:"speed_ratio" default:"1.0"`
	Volume        float64 `json:"volume" yaml:"volume" default:"1.0"`
	Pitch         float64 `json:"pitch" yaml:"pitch" default:"0.0"`
	Emotion       string  `json:"emotion" yaml:"emotion" default:"neutral"`
	LanguageBoost string  `json:"languageBoost" yaml:"language_boost" default:"auto"`
	TrainingTimes int     `json:"trainingTimes" yaml:"training_times" default:"1"`

	SampleRate    int    `json:"sampleRate" yaml:"sample_rate" default:"8000"`
	Bitrate       int    `json:"bitrate" yaml:"bitrate" default:"16"`
	Format        string `json:"format" yaml:"format" default:"pcm"`
	Channels      int    `json:"channels" yaml:"channels" default:"1"`
	FrameDuration string `json:"frameDuration" yaml:"frame_duration" default:"20ms"`
}

func NewMinimaxOption

func NewMinimaxOption(apiKey string) MinimaxOption

func (*MinimaxOption) GetProvider

func (c *MinimaxOption) GetProvider() TTSProvider

GetProvider returns the TTS provider type

func (*MinimaxOption) String

func (opt *MinimaxOption) String() string

type MinimaxPronunciationDict

type MinimaxPronunciationDict struct {
}

type MinimaxService

type MinimaxService struct {
	ConnSessionID string
	TraceID       string
	// contains filtered or unexported fields
}

func NewMinimaxService

func NewMinimaxService(opt MinimaxOption) *MinimaxService

func (*MinimaxService) CacheKey

func (ms *MinimaxService) CacheKey(text string) string

func (*MinimaxService) Close

func (ms *MinimaxService) Close() error

func (*MinimaxService) Format

func (ms *MinimaxService) Format() media.StreamFormat

func (*MinimaxService) GetConnSessionID

func (ms *MinimaxService) GetConnSessionID() string

func (*MinimaxService) GetTraceID

func (ms *MinimaxService) GetTraceID() string

func (*MinimaxService) Provider

func (ms *MinimaxService) Provider() TTSProvider

func (*MinimaxService) Synthesize

func (ms *MinimaxService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error

type MinimaxTaskContinueResponse

type MinimaxTaskContinueResponse struct {
	Data struct {
		Audio string `json:"audio"`
	} `json:"data"`
	SessionID string `json:"session_id"`
	Event     string `json:"event"`
	IsFinal   bool   `json:"is_final"`
	TraceID   string `json:"trace_id"`
	BaseResp  struct {
		StatusCode int    `json:"status_code"`
		StatusMsg  string `json:"status_msg"`
	} `json:"base_resp"`
}

type MinimaxTaskStartRequest

type MinimaxTaskStartRequest struct {
	Event             string                    `json:"event"`
	Model             string                    `json:"model"`
	VoiceSetting      MinimaxVoiceSetting       `json:"voice_setting"`
	AudioSetting      MinimaxAudioSetting       `json:"audio_setting"`
	PronunciationDict *MinimaxPronunciationDict `json:"pronunciation_dict,omitempty"`
	LanguageBoost     string                    `json:"language_boost,omitempty"`
}

type MinimaxTaskStartResponse

type MinimaxTaskStartResponse struct {
	Event    string `json:"event"`
	BaseResp struct {
		StatusCode int    `json:"status_code"`
		StatusMsg  string `json:"status_msg"`
	} `json:"base_resp"`
}

type MinimaxTimbreWeight

type MinimaxTimbreWeight struct {
	VoiceID string `json:"voice_id"`
	Weight  int    `json:"weight"`
}

type MinimaxVoiceSetting

type MinimaxVoiceSetting struct {
	VoiceID       string                `json:"voice_id,omitempty"`
	Weight        int                   `json:"weight,omitempty"`
	TimbreWeights []MinimaxTimbreWeight `json:"timbre_weights,omitempty"`
	Speed         float64               `json:"speed"`
	Volume        float64               `json:"vol"`
	Pitch         float64               `json:"pitch"`
	Emotion       string                `json:"emotion"`
	ToneList      []string              `json:"tonelist,omitempty"`
}

type OpenAIConfig

type OpenAIConfig struct {
	APIKey        string  `json:"api_key" yaml:"api_key" env:"OPENAI_API_KEY"`
	Model         string  `json:"model" yaml:"model" default:"tts-1"`
	Voice         string  `json:"voice" yaml:"voice" default:"alloy"`
	Speed         float64 `json:"speed" yaml:"speed" default:"1.0"`
	SampleRate    int     `json:"sample_rate" yaml:"sample_rate" default:"24000"`
	Channels      int     `json:"channels" yaml:"channels" default:"1"`
	BitDepth      int     `json:"bit_depth" yaml:"bit_depth" default:"16"`
	Codec         string  `json:"codec" yaml:"codec" default:"mp3"`
	FrameDuration string  `json:"frame_duration" yaml:"frame_duration" default:"20ms"`
	Timeout       int     `json:"timeout" yaml:"timeout" default:"30"`
	BaseURL       string  `json:"base_url" yaml:"base_url" default:"https://api.openai.com"`
}

OpenAIConfig OpenAI TTS配置

func NewOpenAIConfig

func NewOpenAIConfig(apiKey string) OpenAIConfig

NewOpenAIConfig 创建 OpenAI TTS 配置

func (*OpenAIConfig) GetProvider

func (c *OpenAIConfig) GetProvider() TTSProvider

GetProvider returns the TTS provider type

type OpenAIRequest

type OpenAIRequest struct {
	Model          string  `json:"model"`
	Input          string  `json:"input"`
	Voice          string  `json:"voice"`
	ResponseFormat string  `json:"response_format,omitempty"`
	Speed          float64 `json:"speed,omitempty"`
}

OpenAIRequest OpenAI API 请求

type OpenAIService

type OpenAIService struct {
	// contains filtered or unexported fields
}

func NewOpenAIService

func NewOpenAIService(opt OpenAIConfig) *OpenAIService

NewOpenAIService 创建 OpenAI TTS 服务

func (*OpenAIService) CacheKey

func (os *OpenAIService) CacheKey(text string) string

func (*OpenAIService) Close

func (os *OpenAIService) Close() error

func (*OpenAIService) Format

func (os *OpenAIService) Format() media.StreamFormat

func (*OpenAIService) Provider

func (os *OpenAIService) Provider() TTSProvider

func (*OpenAIService) Synthesize

func (os *OpenAIService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error

type PlayRecord

type PlayRecord struct {
	// contains filtered or unexported fields
}

type QCloudService

type QCloudService struct {
	// contains filtered or unexported fields
}

func NewQCloudService

func NewQCloudService(opt QCloudTTSConfig) *QCloudService

func (*QCloudService) CacheKey

func (qs *QCloudService) CacheKey(text string) string

func (*QCloudService) Close

func (qs *QCloudService) Close() error

func (*QCloudService) Format

func (qs *QCloudService) Format() media.StreamFormat

func (*QCloudService) Provider

func (qs *QCloudService) Provider() TTSProvider

func (*QCloudService) Synthesize

func (qs *QCloudService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error

type QCloudTTSConfig

type QCloudTTSConfig struct {
	AppID         int64  `json:"appId" yaml:"app_id" env:"QCLOUD_APP_ID"`
	SecretID      string `json:"secretId" yaml:"secret_id" env:"QCLOUD_SECRET_ID"`
	SecretKey     string `json:"secret" yaml:"secret" env:"QCLOUD_SECRET"`
	VoiceType     int64  `json:"voiceType" yaml:"voice_type" default:"1005"`
	ModelType     int64  `json:"modelType" yaml:"model_type" default:"1"`
	Language      string `json:"language" yaml:"language"` // 语言代码,如 zh-CN, en-US(腾讯云通过音色类型区分语言,此字段用于配置和缓存)
	SampleRate    int    `json:"sampleRate" yaml:"sample_rate" default:"8000"`
	Channels      int    `json:"channels" yaml:"channels" default:"1"`
	BitDepth      int    `json:"bitDepth" yaml:"bit_depth" default:"16"`
	Codec         string `json:"codec" yaml:"codec" default:"pcm"`
	FrameDuration string `json:"frameDuration" yaml:"frame_duration" default:"20ms"`
	// Speed is Tencent TTS speed level (typically -2~6, 0 means default).
	Speed int64 `json:"speed" yaml:"speed" default:"0"`
}

QCloudTTSConfig teccent tts config

func NewQcloudTTSConfig

func NewQcloudTTSConfig(appId string, secretId string, secretKey string, voiceType int64, codec string, sample int) QCloudTTSConfig

func (*QCloudTTSConfig) GetProvider

func (c *QCloudTTSConfig) GetProvider() TTSProvider

GetProvider returns the TTS provider type

func (*QCloudTTSConfig) ToString

func (opt *QCloudTTSConfig) ToString() string

type QiniuService

type QiniuService struct {
	// contains filtered or unexported fields
}

func NewQiniuService

func NewQiniuService(opt QiniuTTSConfig) *QiniuService

NewQiniuService 创建七牛云TTS服务

func (*QiniuService) CacheKey

func (qs *QiniuService) CacheKey(text string) string

func (*QiniuService) Close

func (qs *QiniuService) Close() error

func (*QiniuService) Format

func (qs *QiniuService) Format() media.StreamFormat

func (*QiniuService) Provider

func (qs *QiniuService) Provider() TTSProvider

func (*QiniuService) Synthesize

func (qs *QiniuService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error

type QiniuTTSConfig

type QiniuTTSConfig struct {
	APIKey        string `json:"api_key" yaml:"api_key" env:"QINIU_TTS_API_KEY"`
	BaseURL       string `json:"base_url" yaml:"base_url" env:"QINIU_TTS_BASE_URL"`
	VoiceType     string `json:"voice_type" yaml:"voice_type" default:"female_cn_001"`
	SampleRate    int    `json:"sample_rate" yaml:"sample_rate" default:"16000"`
	Channels      int    `json:"channels" yaml:"channels" default:"1"`
	BitDepth      int    `json:"bit_depth" yaml:"bit_depth" default:"16"`
	Codec         string `json:"codec" yaml:"codec" default:"pcm"`
	FrameDuration string `json:"frame_duration" yaml:"frame_duration" default:"20ms"`
	Timeout       int    `json:"timeout" yaml:"timeout" default:"30"`
	Retries       int    `json:"retries" yaml:"retries" default:"0"`
}

QiniuTTSConfig 七牛云TTS配置

func NewQiniuTTSConfig

func NewQiniuTTSConfig(apiKey, baseURL string) QiniuTTSConfig

NewQiniuTTSConfig 创建七牛云TTS配置

func (*QiniuTTSConfig) GetProvider

func (c *QiniuTTSConfig) GetProvider() TTSProvider

GetProvider returns the TTS provider type

type QiniuTTSRequest

type QiniuTTSRequest struct {
	Audio   TTSAudio       `json:"audio"`
	Request TTSRequestData `json:"request"`
}

QiniuTTSRequest 七牛云TTS请求结构

type QiniuTTSResponse

type QiniuTTSResponse struct {
	Reqid     string       `json:"reqid"`
	Operation string       `json:"operation"`
	Sequence  int          `json:"sequence"`
	Data      string       `json:"data"`
	Addition  *TTSAddition `json:"addition,omitempty"`
}

QiniuTTSResponse 七牛云TTS响应结构

type SentenceTimestamp

type SentenceTimestamp struct {
	Words []Word `json:"words"`
}

type SynthesisBuffer

type SynthesisBuffer struct {
	Data      []byte
	Timestamp SentenceTimestamp
}

func (*SynthesisBuffer) OnMessage

func (s *SynthesisBuffer) OnMessage(data []byte)

func (*SynthesisBuffer) OnTimestamp

func (s *SynthesisBuffer) OnTimestamp(timestamp SentenceTimestamp)

type SynthesisConfig

type SynthesisConfig interface {
	GetProvider() TTSProvider
}

SynthesisConfig 统一的TTS配置接口

type SynthesisFactory

type SynthesisFactory interface {
	// CreateEngine 根据配置创建 AudioSynthesisEngine
	CreateEngine(config SynthesisConfig) (AudioSynthesisEngine, error)
	// GetSupportedProviders 获取支持的提供商列表
	GetSupportedProviders() []TTSProvider
	// IsProviderSupported 检查提供商是否支持
	IsProviderSupported(provider TTSProvider) bool
	// RegisterCreator 注册创建函数
	RegisterCreator(provider TTSProvider, creator func(SynthesisConfig) (AudioSynthesisEngine, error))
}

SynthesisFactory TTS工厂接口

func GetGlobalSynthesisFactory

func GetGlobalSynthesisFactory() SynthesisFactory

GetGlobalSynthesisFactory 获取全局TTS工厂实例

type TTSAddition

type TTSAddition struct {
	Duration string `json:"duration"`
}

TTSAddition TTS附加信息

type TTSAudio

type TTSAudio struct {
	VoiceType  string  `json:"voice_type"`
	Encoding   string  `json:"encoding"`
	SpeedRatio float64 `json:"speed_ratio,omitempty"`
}

TTSAudio TTS音频配置

type TTSCredentialConfig

type TTSCredentialConfig map[string]interface{}

TTSCredentialConfig TTS凭证配置结构(灵活的键值对配置)

type TTSProvider

type TTSProvider string

TTSProvider TTS服务提供商类型

const (
	// ProviderQiniu 七牛云TTS
	ProviderQiniu TTSProvider = "qiniu"
	// ProviderXunfei 讯飞TTS
	ProviderXunfei TTSProvider = "xunfei"
	// ProviderAliyun 阿里云TTS
	ProviderAliyun TTSProvider = "aliyun"
	// ProviderTencent 腾讯云TTS
	ProviderTencent TTSProvider = "qcloud"
	// ProviderBaidu 百度TTS
	ProviderBaidu TTSProvider = "baidu"
	// ProviderAzure 微软Azure TTS
	ProviderAzure TTSProvider = "azure"
	// ProviderGoogle Google Cloud TTS
	ProviderGoogle TTSProvider = "google"
	// ProviderAWS Amazon Polly TTS
	ProviderAWS TTSProvider = "aws"
	// ProviderOpenAI OpenAI TTS
	ProviderOpenAI TTSProvider = "openai"
	// ProviderElevenLabs ElevenLabs TTS
	ProviderElevenLabs TTSProvider = "elevenlabs"
	// ProviderLocal 本地TTS
	ProviderLocal TTSProvider = "local"
	// ProviderLocalGoSpeech 本地go-speech TTS
	ProviderLocalGoSpeech TTSProvider = "local_gospeech"
	// ProviderFishSpeech FishSpeech TTS
	ProviderFishSpeech TTSProvider = "fishspeech"
	// ProviderFishAudio Fish Audio TTS
	ProviderFishAudio TTSProvider = "fishaudio"
	// ProviderCoqui Coqui TTS
	ProviderCoqui TTSProvider = "coqui"
	// ProviderVolcengine 火山引擎标准TTS
	ProviderVolcengine TTSProvider = "volcengine"
	// ProviderMinimax Minimax TTS
	ProviderMinimax TTSProvider = "minimax"
)

func (TTSProvider) ToString

func (tp TTSProvider) ToString() string

type TTSRequestData

type TTSRequestData struct {
	Text string `json:"text"`
}

TTSRequestData TTS请求数据

type VolcAddition

type VolcAddition struct {
	Frontend string `json:"frontend"`
}

VolcAddition 火山引擎附加信息

type VolcengineService

type VolcengineService struct {
	// contains filtered or unexported fields
}

VolcengineService 火山引擎标准TTS服务

func NewVolcengineService

func NewVolcengineService(opt VolcengineTTSOption) *VolcengineService

NewVolcengineService 创建火山引擎TTS服务

func (*VolcengineService) CacheKey

func (v *VolcengineService) CacheKey(text string) string

func (*VolcengineService) Close

func (v *VolcengineService) Close() error

func (*VolcengineService) Format

func (v *VolcengineService) Format() media.StreamFormat

func (*VolcengineService) Provider

func (v *VolcengineService) Provider() TTSProvider

func (*VolcengineService) Synthesize

func (v *VolcengineService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error

type VolcengineTTSOption

type VolcengineTTSOption struct {
	AppID         string  `json:"appID"`         // 应用ID
	AccessToken   string  `json:"accessToken"`   // 访问令牌
	Cluster       string  `json:"cluster"`       // 集群名称,如 volcano_tts
	VoiceType     string  `json:"voiceType"`     // 音色类型,如 BV700_streaming
	Rate          int     `json:"rate"`          // 采样率,默认 8000
	Encoding      string  `json:"encoding"`      // 编码格式,默认 pcm
	SpeedRatio    float32 `json:"speedRatio"`    // 语速比例,默认 1.0
	VolumeRatio   float32 `json:"volumeRatio"`   // 音量比例,默认 1.0
	PitchRatio    float32 `json:"pitchRatio"`    // 音调比例,默认 1.0
	Channels      int     `json:"channels"`      // 声道数,默认 1
	BitDepth      int     `json:"bitDepth"`      // 位深度,默认 16
	FrameDuration string  `json:"frameDuration"` // 帧时长,默认 20ms
	TextType      string  `json:"textType"`      // 文本类型,plain 或 ssml
	Ssml          bool    `json:"ssml"`          // 是否使用 SSML
}

VolcengineTTSOption 火山引擎标准TTS配置 支持的常用音色类型(VoiceType): - BV700_streaming: 默认音色 - BV700_V2_streaming: V2版本 - BV213_streaming: 广西老表(男声) - BV025_streaming: 甜美台妹(女声) 更多音色类型请参考火山引擎官方文档

func NewVolcengineTTSOption

func NewVolcengineTTSOption(appID, accessToken, cluster string) VolcengineTTSOption

NewVolcengineTTSOption 创建火山引擎TTS配置

func (*VolcengineTTSOption) GetProvider

func (c *VolcengineTTSOption) GetProvider() TTSProvider

GetProvider returns the TTS provider type

type VolcengineTTSServResponse

type VolcengineTTSServResponse struct {
	ReqID     string       `json:"reqid"`
	Code      int          `json:"code"`
	Message   string       `json:"message"`
	Operation string       `json:"operation"`
	Sequence  int          `json:"sequence"`
	Data      string       `json:"data"`
	Addition  VolcAddition `json:"addition"`
}

VolcengineTTSServResponse 火山引擎TTS响应结构

type WSAudio

type WSAudio struct {
	Encoding   string `json:"encoding"`
	SampleRate int    `json:"sample_rate"`
}

WSAudio WebSocket音频参数

type WSHeader

type WSHeader struct {
	AppID  string `json:"app_id"`
	Status int    `json:"status"`
	ResID  string `json:"res_id"`
}

WSHeader WebSocket请求头

type WSParameter

type WSParameter struct {
	TTS WSTTS `json:"tts"`
}

WSParameter WebSocket参数

type WSPayload

type WSPayload struct {
	Text struct {
		Encoding string `json:"encoding"`
		Compress string `json:"compress"`
		Format   string `json:"format"`
		Status   int    `json:"status"`
		Seq      int    `json:"seq"`
		Text     string `json:"text"`
	} `json:"text"`
}

WSPayload WebSocket载荷

type WSRequest

type WSRequest struct {
	Header    WSHeader    `json:"header"`
	Parameter WSParameter `json:"parameter"`
	Payload   WSPayload   `json:"payload"`
}

WSRequest WebSocket请求结构

type WSTTS

type WSTTS struct {
	Vcn      string  `json:"vcn"`
	Volume   int     `json:"volume"`
	Rhy      int     `json:"rhy"`
	Pybuffer int     `json:"pybuffer"`
	Speed    int     `json:"speed"`
	Pitch    int     `json:"pitch"`
	Bgs      int     `json:"bgs"`
	Reg      int     `json:"reg"`
	Rdn      int     `json:"rdn"`
	Audio    WSAudio `json:"audio"`
}

WSTTS WebSocket TTS参数

type Word

type Word struct {
	Confidence float64 `json:"confidence"`
	EndTime    int     `json:"end_time"`
	StartTime  int     `json:"start_time"`
	Word       string  `json:"word"`
}

type XunfeiService

type XunfeiService struct {
	// contains filtered or unexported fields
}

func NewXunfeiService

func NewXunfeiService(opt XunfeiTTSConfig) *XunfeiService

NewXunfeiService 创建讯飞TTS服务

func (*XunfeiService) CacheKey

func (xs *XunfeiService) CacheKey(text string) string

func (*XunfeiService) Close

func (xs *XunfeiService) Close() error

func (*XunfeiService) Format

func (xs *XunfeiService) Format() media.StreamFormat

func (*XunfeiService) Provider

func (xs *XunfeiService) Provider() TTSProvider

func (*XunfeiService) Synthesize

func (xs *XunfeiService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error

type XunfeiTTSConfig

type XunfeiTTSConfig struct {
	AppID         string `json:"app_id" yaml:"app_id" env:"XUNFEI_APP_ID"`
	APIKey        string `json:"api_key" yaml:"api_key" env:"XUNFEI_API_KEY"`
	APISecret     string `json:"api_secret" yaml:"api_secret" env:"XUNFEI_API_SECRET"`
	SampleRate    int    `json:"sample_rate" yaml:"sample_rate" default:"24000"`
	Channels      int    `json:"channels" yaml:"channels" default:"1"`
	BitDepth      int    `json:"bit_depth" yaml:"bit_depth" default:"16"`
	Codec         string `json:"codec" yaml:"codec" default:"raw"`
	FrameDuration string `json:"frame_duration" yaml:"frame_duration" default:"20ms"`
	Timeout       int    `json:"timeout" yaml:"timeout" default:"30"`
}

XunfeiTTSConfig 讯飞TTS配置

func NewXunfeiTTSConfig

func NewXunfeiTTSConfig(appID, apiKey, apiSecret string) XunfeiTTSConfig

NewXunfeiTTSConfig 创建讯飞TTS配置

func (*XunfeiTTSConfig) GetProvider

func (c *XunfeiTTSConfig) GetProvider() TTSProvider

GetProvider returns the TTS provider type

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL