Documentation
¶
Index ¶
- Constants
- type AuthToken
- type Config
- type CreateTaskRequest
- type CreateTaskResponse
- type Factory
- type Provider
- type SentenceTimestamp
- type SubmitAudioRequest
- type SynthesisHandler
- type SynthesizeRequest
- type SynthesizeResponse
- type TaskStatus
- type TextSegment
- type TrainingStatus
- type TrainingText
- type VoiceCloneService
- type VoiceCloneSynthesisService
- func (v *VoiceCloneSynthesisService) CacheKey(text string) string
- func (v *VoiceCloneSynthesisService) Close() error
- func (v *VoiceCloneSynthesisService) Format() map[string]interface{}
- func (v *VoiceCloneSynthesisService) Provider() string
- func (v *VoiceCloneSynthesisService) Synthesize(ctx context.Context, handler interface{}, text string) error
- type VolcAddition
- type VolcengineCloneConfig
- type VolcengineCloneService
- func (s *VolcengineCloneService) CreateTask(ctx context.Context, req *CreateTaskRequest) (*CreateTaskResponse, error)
- func (s *VolcengineCloneService) GetTrainingTexts(ctx context.Context, textID int64) (*TrainingText, error)
- func (s *VolcengineCloneService) Provider() Provider
- func (s *VolcengineCloneService) QueryTaskStatus(ctx context.Context, taskID string) (*TaskStatus, error)
- func (s *VolcengineCloneService) SubmitAudio(ctx context.Context, req *SubmitAudioRequest) error
- func (s *VolcengineCloneService) Synthesize(ctx context.Context, req *SynthesizeRequest) (*SynthesizeResponse, error)
- func (s *VolcengineCloneService) SynthesizeStream(ctx context.Context, req *SynthesizeRequest, handler SynthesisHandler) error
- type VolcengineResponse
- type VolcengineSentenceTimestamp
- type Word
- type XunfeiCloneConfig
- type XunfeiCloneService
- func (s *XunfeiCloneService) CreateTask(ctx context.Context, req *CreateTaskRequest) (*CreateTaskResponse, error)
- func (s *XunfeiCloneService) GetTrainingTexts(ctx context.Context, textID int64) (*TrainingText, error)
- func (s *XunfeiCloneService) Provider() Provider
- func (s *XunfeiCloneService) QueryTaskStatus(ctx context.Context, taskID string) (*TaskStatus, error)
- func (s *XunfeiCloneService) SubmitAudio(ctx context.Context, req *SubmitAudioRequest) error
- func (s *XunfeiCloneService) Synthesize(ctx context.Context, req *SynthesizeRequest) (*SynthesizeResponse, error)
- func (s *XunfeiCloneService) SynthesizeStream(ctx context.Context, req *SynthesizeRequest, handler SynthesisHandler) error
Constants ¶
const (
VolcengineCloneCluster = "volcano_icl"
)
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type AuthToken ¶
type AuthToken struct {
AccessToken string `json:"accesstoken"`
ExpiresIn string `json:"expiresin"`
RetCode string `json:"retcode"`
}
AuthToken 鉴权token
type Config ¶
type Config struct {
Provider Provider `json:"provider"` // 服务提供商
Options map[string]interface{} `json:"options"` // 提供商特定配置
}
Config 语音克隆配置
type CreateTaskRequest ¶
type CreateTaskRequest struct {
TaskName string `json:"task_name"` // 任务名称
Sex int `json:"sex"` // 性别 1:男 2:女
AgeGroup int `json:"age_group"` // 年龄段 1:儿童 2:青年 3:中年 4:中老年
Language string `json:"language"` // 语言代码,如 zh, en
ResourceType int `json:"resource_type"` // 12=一句话复刻
EngineVersion string `json:"engine_version"` // 多风格版传 omni_v1;标准版不传
Denoise int `json:"denoise"` // 0关 1开
MosRatio float64 `json:"mos_ratio"` // 音频检测阈值,0 表示关闭
}
CreateTaskRequest 创建训练任务请求
type CreateTaskResponse ¶
type CreateTaskResponse struct {
TaskID string `json:"task_id"` // 任务ID
}
CreateTaskResponse 创建训练任务响应
type Factory ¶
type Factory struct{}
Factory 语音克隆服务工厂
func (*Factory) CreateService ¶
func (f *Factory) CreateService(config *Config) (VoiceCloneService, error)
CreateService 根据配置创建语音克隆服务
func (*Factory) CreateServiceFromJSON ¶
func (f *Factory) CreateServiceFromJSON(jsonConfig string) (VoiceCloneService, error)
CreateServiceFromJSON 从JSON配置创建服务
func (*Factory) GetSupportedProviders ¶
GetSupportedProviders 获取支持的提供商列表
func (*Factory) ValidateConfig ¶
ValidateConfig 验证配置
type SentenceTimestamp ¶
type SentenceTimestamp struct {
StartTime int64 `json:"start_time"` // 开始时间(毫秒)
EndTime int64 `json:"end_time"` // 结束时间(毫秒)
}
SentenceTimestamp 句子时间戳(兼容 synthesis 包)
type SubmitAudioRequest ¶
type SubmitAudioRequest struct {
TaskID string `json:"task_id"` // 任务ID
TextID int64 `json:"text_id"` // 训练文本ID
TextSegID int64 `json:"text_seg_id"` // 文本段落ID
AudioFile io.Reader `json:"-"` // 音频文件
Language string `json:"language"` // 语言代码
MosRatio float64 `json:"mos_ratio"` // 可选,>0 开启音频检测
}
SubmitAudioRequest 提交音频请求
type SynthesisHandler ¶
type SynthesisHandler interface {
OnMessage([]byte)
OnTimestamp(timestamp SentenceTimestamp)
}
SynthesisHandler 流式合成处理器接口(与 synthesis 包兼容)
type SynthesizeRequest ¶
type SynthesizeRequest struct {
AssetID string `json:"asset_id"` // 训练得到的音色 id(assetId / res_id)
Text string `json:"text"` // 要合成的文本
Language string `json:"language"` // 语言代码,映射为 languageID
LanguageID *int `json:"language_id"` // 0中文 1英 2日 3韩 4俄 5法 6阿 7西 8粤语
Style string `json:"style"` // 多风格版方言/情感,见官方 style 列表
}
SynthesizeRequest 合成请求
type SynthesizeResponse ¶
type SynthesizeResponse struct {
AudioData []byte `json:"audio_data"` // 音频数据
Format string `json:"format"` // 音频格式,如 pcm, wav, mp3
SampleRate int `json:"sample_rate"` // 采样率
Duration float64 `json:"duration"` // 音频时长(秒)
}
SynthesizeResponse 合成响应
type TaskStatus ¶
type TaskStatus struct {
TaskID string `json:"task_id"` // 任务ID
TaskName string `json:"task_name"` // 任务名称
Status TrainingStatus `json:"status"` // 训练状态
AssetID string `json:"asset_id"` // 音色ID(训练成功后返回)
TrainVID string `json:"train_vid"` // 音库ID
FailedDesc string `json:"failed_desc"` // 失败原因
Progress float64 `json:"progress"` // 训练进度 0-100
CreatedAt time.Time `json:"created_at"` // 创建时间
UpdatedAt time.Time `json:"updated_at"` // 更新时间
}
TaskStatus 任务状态
type TextSegment ¶
type TextSegment struct {
SegID interface{} `json:"seg_id"` // 可能是字符串或数字
SegText string `json:"seg_text"`
}
TextSegment 文本段落
type TrainingStatus ¶
type TrainingStatus int
TrainingStatus 训练状态
const ( TrainingStatusQueued TrainingStatus = 2 // 排队中 TrainingStatusInProgress TrainingStatus = -1 // 训练中 TrainingStatusSuccess TrainingStatus = 1 // 成功 TrainingStatusFailed TrainingStatus = 0 // 失败 )
type TrainingText ¶
type TrainingText struct {
TextID int64 `json:"text_id"`
TextName string `json:"text_name"`
Segments []TextSegment `json:"segments"`
}
TrainingText 训练文本
type VoiceCloneService ¶
type VoiceCloneService interface {
// Provider 返回服务提供商名称
Provider() Provider
// GetTrainingTexts 获取训练文本列表
GetTrainingTexts(ctx context.Context, textID int64) (*TrainingText, error)
// CreateTask 创建训练任务
CreateTask(ctx context.Context, req *CreateTaskRequest) (*CreateTaskResponse, error)
// SubmitAudio 提交音频文件
SubmitAudio(ctx context.Context, req *SubmitAudioRequest) error
// QueryTaskStatus 查询任务状态
QueryTaskStatus(ctx context.Context, taskID string) (*TaskStatus, error)
// Synthesize 使用训练好的音色合成语音(批量模式,返回完整音频)
Synthesize(ctx context.Context, req *SynthesizeRequest) (*SynthesizeResponse, error)
// SynthesizeStream 使用训练好的音色流式合成语音(流式模式,通过 handler 回调)
SynthesizeStream(ctx context.Context, req *SynthesizeRequest, handler SynthesisHandler) error
}
VoiceCloneService 语音克隆服务接口
type VoiceCloneSynthesisService ¶
type VoiceCloneSynthesisService struct {
// contains filtered or unexported fields
}
VoiceCloneSynthesisService 实现 synthesizer.SynthesisService 接口的适配器 用于在硬件通话中使用克隆音色
func NewVoiceCloneSynthesisService ¶
func NewVoiceCloneSynthesisService(cloneService VoiceCloneService, assetID string) *VoiceCloneSynthesisService
NewVoiceCloneSynthesisService 创建适配器
func (*VoiceCloneSynthesisService) CacheKey ¶
func (v *VoiceCloneSynthesisService) CacheKey(text string) string
CacheKey 返回缓存键
func (*VoiceCloneSynthesisService) Close ¶
func (v *VoiceCloneSynthesisService) Close() error
Close 关闭服务
func (*VoiceCloneSynthesisService) Format ¶
func (v *VoiceCloneSynthesisService) Format() map[string]interface{}
Format 返回音频格式
func (*VoiceCloneSynthesisService) Provider ¶
func (v *VoiceCloneSynthesisService) Provider() string
Provider 返回提供商名称
func (*VoiceCloneSynthesisService) Synthesize ¶
func (v *VoiceCloneSynthesisService) Synthesize(ctx context.Context, handler interface{}, text string) error
Synthesize 实现 synthesizer.SynthesisService 接口
type VolcAddition ¶
type VolcAddition struct {
Frontend string `json:"frontend"`
}
VolcAddition 火山引擎附加信息
type VolcengineCloneConfig ¶
type VolcengineCloneConfig struct {
AppID string `json:"app_id"`
Token string `json:"token"` // WebSocket认证token(必需)
Cluster string `json:"cluster"` // 集群名称,默认 "volcano_icl"
VoiceType string `json:"voice_type"` // 音色类型(训练好的音色ID)
Encoding string `json:"encoding"` // 编码格式,默认 "pcm"
SampleRate int `json:"sample_rate"` // 采样率,默认 8000
BitDepth int `json:"bit_depth"` // 位深度,默认 16
Channels int `json:"channels"` // 声道数,默认 1
FrameDuration string `json:"frame_duration"` // 帧时长,默认 "20ms"
SpeedRatio float64 `json:"speed_ratio"` // 语速比例,默认 1.0
TrainingTimes int `json:"training_times"` // 训练次数,默认 1
}
VolcengineCloneConfig 火山引擎克隆配置
type VolcengineCloneService ¶
type VolcengineCloneService struct {
// contains filtered or unexported fields
}
VolcengineCloneService 火山引擎语音克隆服务
func NewVolcengineCloneService ¶
func NewVolcengineCloneService(config VolcengineCloneConfig) *VolcengineCloneService
NewVolcengineCloneService 创建火山引擎克隆服务
func (*VolcengineCloneService) CreateTask ¶
func (s *VolcengineCloneService) CreateTask(ctx context.Context, req *CreateTaskRequest) (*CreateTaskResponse, error)
CreateTask 创建训练任务 注意:火山引擎的训练需要先在控制台创建 speaker_id,然后通过 SubmitAudio 上传音频 这里返回一个占位任务ID,实际训练通过 SubmitAudio 完成
func (*VolcengineCloneService) GetTrainingTexts ¶
func (s *VolcengineCloneService) GetTrainingTexts(ctx context.Context, textID int64) (*TrainingText, error)
GetTrainingTexts 获取训练文本(火山引擎暂不支持,返回错误)
func (*VolcengineCloneService) Provider ¶
func (s *VolcengineCloneService) Provider() Provider
Provider 返回服务提供商
func (*VolcengineCloneService) QueryTaskStatus ¶
func (s *VolcengineCloneService) QueryTaskStatus(ctx context.Context, taskID string) (*TaskStatus, error)
QueryTaskStatus 查询任务状态 taskID 应该是 speaker_id
func (*VolcengineCloneService) SubmitAudio ¶
func (s *VolcengineCloneService) SubmitAudio(ctx context.Context, req *SubmitAudioRequest) error
SubmitAudio 提交音频文件进行训练 speaker_id 需要从控制台获取,或通过 TaskID 参数传入(格式:speaker_id:xxx)
func (*VolcengineCloneService) Synthesize ¶
func (s *VolcengineCloneService) Synthesize(ctx context.Context, req *SynthesizeRequest) (*SynthesizeResponse, error)
Synthesize 使用训练好的音色合成语音
func (*VolcengineCloneService) SynthesizeStream ¶
func (s *VolcengineCloneService) SynthesizeStream(ctx context.Context, req *SynthesizeRequest, handler SynthesisHandler) error
SynthesizeStream 流式合成语音
type VolcengineResponse ¶
type VolcengineResponse struct {
ProtocolVersion int
HeaderSize int
MessageType int
MessageTypeSpecificFlags int
SerializationMethod int
MessageCompression int
Reserved int
SequenceNumber int
PayloadSize int
Audio []byte
IsLast bool
ErrorCode int
ErrorMessage string
Timestamp *VolcengineSentenceTimestamp
}
VolcengineResponse 火山引擎WebSocket响应结构
type VolcengineSentenceTimestamp ¶
type VolcengineSentenceTimestamp struct {
Words []Word `json:"words"`
}
VolcengineSentenceTimestamp 火山引擎句子时间戳(内部使用)
type Word ¶
type Word struct {
Confidence float64 `json:"confidence"`
EndTime int `json:"end_time"`
StartTime int `json:"start_time"`
Word string `json:"word"`
}
Word 单词时间戳
type XunfeiCloneConfig ¶
type XunfeiCloneConfig struct {
AppID string `json:"app_id"`
APIKey string `json:"api_key"`
BaseURL string `json:"base_url"`
Timeout int `json:"timeout"`
EngineVersion string `json:"engine_version"` // 多风格版: omni_v1
VCN string `json:"vcn"` // 合成 vcn:多风格 x6_clone,标准 x5_clone
WebSocketAppID string `json:"ws_app_id"`
WebSocketAPIKey string `json:"ws_api_key"`
WebSocketAPISecret string `json:"ws_api_secret"`
}
XunfeiCloneConfig 讯飞一句话复刻配置(标准版 / 多风格版 omni_v1)
type XunfeiCloneService ¶
type XunfeiCloneService struct {
// contains filtered or unexported fields
}
XunfeiCloneService 讯飞语音克隆服务
func NewXunfeiCloneService ¶
func NewXunfeiCloneService(config XunfeiCloneConfig) *XunfeiCloneService
NewXunfeiCloneService 创建讯飞克隆服务 讯飞语音克隆服务实现
func (*XunfeiCloneService) CreateTask ¶
func (s *XunfeiCloneService) CreateTask(ctx context.Context, req *CreateTaskRequest) (*CreateTaskResponse, error)
CreateTask 创建训练任务(task/add,resourceType=12 一句话复刻)
func (*XunfeiCloneService) GetTrainingTexts ¶
func (s *XunfeiCloneService) GetTrainingTexts(ctx context.Context, textID int64) (*TrainingText, error)
GetTrainingTexts 获取训练文本 讯飞语音克隆服务实现
func (*XunfeiCloneService) Provider ¶
func (s *XunfeiCloneService) Provider() Provider
Provider 返回服务提供商
func (*XunfeiCloneService) QueryTaskStatus ¶
func (s *XunfeiCloneService) QueryTaskStatus(ctx context.Context, taskID string) (*TaskStatus, error)
QueryTaskStatus 查询任务状态 讯飞语音克隆服务实现
func (*XunfeiCloneService) SubmitAudio ¶
func (s *XunfeiCloneService) SubmitAudio(ctx context.Context, req *SubmitAudioRequest) error
SubmitAudio 提交音频文件
func (*XunfeiCloneService) Synthesize ¶
func (s *XunfeiCloneService) Synthesize(ctx context.Context, req *SynthesizeRequest) (*SynthesizeResponse, error)
Synthesize 使用训练好的音色合成语音
func (*XunfeiCloneService) SynthesizeStream ¶
func (s *XunfeiCloneService) SynthesizeStream(ctx context.Context, req *SynthesizeRequest, handler SynthesisHandler) error
SynthesizeStream 流式合成语音