voiceclone

package

v1.2.0 Latest Latest Go to latest Published: Jun 6, 2026 License: MIT Imports: 21 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/LingByte/lingllm

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
type AuthToken
type Config
type CreateTaskRequest
type CreateTaskResponse
type Factory
- func NewFactory() *Factory
- func (f *Factory) CreateService(config *Config) (VoiceCloneService, error)
- func (f *Factory) CreateServiceFromJSON(jsonConfig string) (VoiceCloneService, error)
- func (f *Factory) GetSupportedProviders() []Provider
- func (f *Factory) ValidateConfig(config *Config) error
type Provider
type SentenceTimestamp
type SubmitAudioRequest
type SynthesisHandler
type SynthesizeRequest
type SynthesizeResponse
type TaskStatus
type TextSegment
type TrainingStatus
type TrainingText
type VoiceCloneService
type VoiceCloneSynthesisService
- func NewVoiceCloneSynthesisService(cloneService VoiceCloneService, assetID string) *VoiceCloneSynthesisService
- func (v *VoiceCloneSynthesisService) CacheKey(text string) string
- func (v *VoiceCloneSynthesisService) Close() error
- func (v *VoiceCloneSynthesisService) Format() map[string]interface{}
- func (v *VoiceCloneSynthesisService) Provider() string
- func (v *VoiceCloneSynthesisService) Synthesize(ctx context.Context, handler interface{}, text string) error
type VolcAddition
type VolcengineCloneConfig
type VolcengineCloneService
- func NewVolcengineCloneService(config VolcengineCloneConfig) *VolcengineCloneService
- func (s *VolcengineCloneService) CreateTask(ctx context.Context, req *CreateTaskRequest) (*CreateTaskResponse, error)
- func (s *VolcengineCloneService) GetTrainingTexts(ctx context.Context, textID int64) (*TrainingText, error)
- func (s *VolcengineCloneService) Provider() Provider
- func (s *VolcengineCloneService) QueryTaskStatus(ctx context.Context, taskID string) (*TaskStatus, error)
- func (s *VolcengineCloneService) SubmitAudio(ctx context.Context, req *SubmitAudioRequest) error
- func (s *VolcengineCloneService) Synthesize(ctx context.Context, req *SynthesizeRequest) (*SynthesizeResponse, error)
- func (s *VolcengineCloneService) SynthesizeStream(ctx context.Context, req *SynthesizeRequest, handler SynthesisHandler) error
type VolcengineResponse
type VolcengineSentenceTimestamp
type Word
type XunfeiCloneConfig
type XunfeiCloneService
- func NewXunfeiCloneService(config XunfeiCloneConfig) *XunfeiCloneService
- func (s *XunfeiCloneService) CreateTask(ctx context.Context, req *CreateTaskRequest) (*CreateTaskResponse, error)
- func (s *XunfeiCloneService) GetTrainingTexts(ctx context.Context, textID int64) (*TrainingText, error)
- func (s *XunfeiCloneService) Provider() Provider
- func (s *XunfeiCloneService) QueryTaskStatus(ctx context.Context, taskID string) (*TaskStatus, error)
- func (s *XunfeiCloneService) SubmitAudio(ctx context.Context, req *SubmitAudioRequest) error
- func (s *XunfeiCloneService) Synthesize(ctx context.Context, req *SynthesizeRequest) (*SynthesizeResponse, error)
- func (s *XunfeiCloneService) SynthesizeStream(ctx context.Context, req *SynthesizeRequest, handler SynthesisHandler) error

Constants ¶

View Source

const (
	VolcengineCloneCluster = "volcano_icl"
)

Variables ¶

This section is empty.

Functions ¶

This section is empty.

Types ¶

type AuthToken ¶

type AuthToken struct {
	AccessToken string `json:"accesstoken"`
	ExpiresIn   string `json:"expiresin"`
	RetCode     string `json:"retcode"`
}

AuthToken 鉴权token

type Config ¶

type Config struct {
	Provider Provider               `json:"provider"` // 服务提供商
	Options  map[string]interface{} `json:"options"`  // 提供商特定配置
}

Config 语音克隆配置

type CreateTaskRequest ¶

type CreateTaskRequest struct {
	TaskName      string  `json:"task_name"`      // 任务名称
	Sex           int     `json:"sex"`            // 性别 1:男 2:女
	AgeGroup      int     `json:"age_group"`      // 年龄段 1:儿童 2:青年 3:中年 4:中老年
	Language      string  `json:"language"`       // 语言代码，如 zh, en
	ResourceType  int     `json:"resource_type"`  // 12=一句话复刻
	EngineVersion string  `json:"engine_version"` // 多风格版传 omni_v1；标准版不传
	Denoise       int     `json:"denoise"`        // 0关 1开
	MosRatio      float64 `json:"mos_ratio"`      // 音频检测阈值，0 表示关闭
}

CreateTaskRequest 创建训练任务请求

type CreateTaskResponse ¶

type CreateTaskResponse struct {
	TaskID string `json:"task_id"` // 任务ID
}

CreateTaskResponse 创建训练任务响应

type Factory ¶

type Factory struct{}

Factory 语音克隆服务工厂

func NewFactory ¶

func NewFactory() *Factory

NewFactory 创建工厂实例

func (*Factory) CreateService ¶

func (f *Factory) CreateService(config *Config) (VoiceCloneService, error)

CreateService 根据配置创建语音克隆服务

func (*Factory) CreateServiceFromJSON ¶

func (f *Factory) CreateServiceFromJSON(jsonConfig string) (VoiceCloneService, error)

CreateServiceFromJSON 从JSON配置创建服务

func (*Factory) GetSupportedProviders ¶

func (f *Factory) GetSupportedProviders() []Provider

GetSupportedProviders 获取支持的提供商列表

func (*Factory) ValidateConfig ¶

func (f *Factory) ValidateConfig(config *Config) error

ValidateConfig 验证配置

type Provider ¶

type Provider string

Provider 语音克隆服务提供商

const (
	ProviderXunfei     Provider = "xunfei"     // 讯飞星火
	ProviderVolcengine Provider = "volcengine" // 火山引擎
)

type SentenceTimestamp ¶

type SentenceTimestamp struct {
	StartTime int64 `json:"start_time"` // 开始时间（毫秒）
	EndTime   int64 `json:"end_time"`   // 结束时间（毫秒）
}

SentenceTimestamp 句子时间戳（兼容 synthesis 包）

type SubmitAudioRequest ¶

type SubmitAudioRequest struct {
	TaskID    string    `json:"task_id"`     // 任务ID
	TextID    int64     `json:"text_id"`     // 训练文本ID
	TextSegID int64     `json:"text_seg_id"` // 文本段落ID
	AudioFile io.Reader `json:"-"`           // 音频文件
	Language  string    `json:"language"`    // 语言代码
	MosRatio  float64   `json:"mos_ratio"`   // 可选，>0 开启音频检测
}

SubmitAudioRequest 提交音频请求

type SynthesisHandler ¶

type SynthesisHandler interface {
	OnMessage([]byte)
	OnTimestamp(timestamp SentenceTimestamp)
}

SynthesisHandler 流式合成处理器接口（与 synthesis 包兼容）

type SynthesizeRequest ¶

type SynthesizeRequest struct {
	AssetID    string `json:"asset_id"`    // 训练得到的音色 id（assetId / res_id）
	Text       string `json:"text"`        // 要合成的文本
	Language   string `json:"language"`    // 语言代码，映射为 languageID
	LanguageID *int   `json:"language_id"` // 0中文 1英 2日 3韩 4俄 5法 6阿 7西 8粤语
	Style      string `json:"style"`       // 多风格版方言/情感，见官方 style 列表
}

SynthesizeRequest 合成请求

type SynthesizeResponse ¶

type SynthesizeResponse struct {
	AudioData  []byte  `json:"audio_data"`  // 音频数据
	Format     string  `json:"format"`      // 音频格式，如 pcm, wav, mp3
	SampleRate int     `json:"sample_rate"` // 采样率
	Duration   float64 `json:"duration"`    // 音频时长（秒）
}

SynthesizeResponse 合成响应

type TaskStatus ¶

type TaskStatus struct {
	TaskID     string         `json:"task_id"`     // 任务ID
	TaskName   string         `json:"task_name"`   // 任务名称
	Status     TrainingStatus `json:"status"`      // 训练状态
	AssetID    string         `json:"asset_id"`    // 音色ID（训练成功后返回）
	TrainVID   string         `json:"train_vid"`   // 音库ID
	FailedDesc string         `json:"failed_desc"` // 失败原因
	Progress   float64        `json:"progress"`    // 训练进度 0-100
	CreatedAt  time.Time      `json:"created_at"`  // 创建时间
	UpdatedAt  time.Time      `json:"updated_at"`  // 更新时间
}

TaskStatus 任务状态

type TextSegment ¶

type TextSegment struct {
	SegID   interface{} `json:"seg_id"` // 可能是字符串或数字
	SegText string      `json:"seg_text"`
}

TextSegment 文本段落

type TrainingStatus ¶

type TrainingStatus int

TrainingStatus 训练状态

const (
	TrainingStatusQueued     TrainingStatus = 2  // 排队中
	TrainingStatusInProgress TrainingStatus = -1 // 训练中
	TrainingStatusSuccess    TrainingStatus = 1  // 成功
	TrainingStatusFailed     TrainingStatus = 0  // 失败
)

type TrainingText ¶

type TrainingText struct {
	TextID   int64         `json:"text_id"`
	TextName string        `json:"text_name"`
	Segments []TextSegment `json:"segments"`
}

TrainingText 训练文本

type VoiceCloneService ¶

type VoiceCloneService interface {
	// Provider 返回服务提供商名称
	Provider() Provider

	// GetTrainingTexts 获取训练文本列表
	GetTrainingTexts(ctx context.Context, textID int64) (*TrainingText, error)

	// CreateTask 创建训练任务
	CreateTask(ctx context.Context, req *CreateTaskRequest) (*CreateTaskResponse, error)

	// SubmitAudio 提交音频文件
	SubmitAudio(ctx context.Context, req *SubmitAudioRequest) error

	// QueryTaskStatus 查询任务状态
	QueryTaskStatus(ctx context.Context, taskID string) (*TaskStatus, error)

	// Synthesize 使用训练好的音色合成语音（批量模式，返回完整音频）
	Synthesize(ctx context.Context, req *SynthesizeRequest) (*SynthesizeResponse, error)

	// SynthesizeStream 使用训练好的音色流式合成语音（流式模式，通过 handler 回调）
	SynthesizeStream(ctx context.Context, req *SynthesizeRequest, handler SynthesisHandler) error
}

VoiceCloneService 语音克隆服务接口

type VoiceCloneSynthesisService ¶

type VoiceCloneSynthesisService struct {
	// contains filtered or unexported fields
}

VoiceCloneSynthesisService 实现 synthesizer.SynthesisService 接口的适配器用于在硬件通话中使用克隆音色

func NewVoiceCloneSynthesisService ¶

func NewVoiceCloneSynthesisService(cloneService VoiceCloneService, assetID string) *VoiceCloneSynthesisService

NewVoiceCloneSynthesisService 创建适配器

func (*VoiceCloneSynthesisService) CacheKey ¶

func (v *VoiceCloneSynthesisService) CacheKey(text string) string

CacheKey 返回缓存键

func (*VoiceCloneSynthesisService) Close ¶

func (v *VoiceCloneSynthesisService) Close() error

Close 关闭服务

func (*VoiceCloneSynthesisService) Format ¶

func (v *VoiceCloneSynthesisService) Format() map[string]interface{}

Format 返回音频格式

func (*VoiceCloneSynthesisService) Provider ¶

func (v *VoiceCloneSynthesisService) Provider() string

Provider 返回提供商名称

func (*VoiceCloneSynthesisService) Synthesize ¶

func (v *VoiceCloneSynthesisService) Synthesize(ctx context.Context, handler interface{}, text string) error

Synthesize 实现 synthesizer.SynthesisService 接口

type VolcAddition ¶

type VolcAddition struct {
	Frontend string `json:"frontend"`
}

VolcAddition 火山引擎附加信息

type VolcengineCloneConfig ¶

type VolcengineCloneConfig struct {
	AppID         string  `json:"app_id"`
	Token         string  `json:"token"`          // WebSocket认证token（必需）
	Cluster       string  `json:"cluster"`        // 集群名称，默认 "volcano_icl"
	VoiceType     string  `json:"voice_type"`     // 音色类型（训练好的音色ID）
	Encoding      string  `json:"encoding"`       // 编码格式，默认 "pcm"
	SampleRate    int     `json:"sample_rate"`    // 采样率，默认 8000
	BitDepth      int     `json:"bit_depth"`      // 位深度，默认 16
	Channels      int     `json:"channels"`       // 声道数，默认 1
	FrameDuration string  `json:"frame_duration"` // 帧时长，默认 "20ms"
	SpeedRatio    float64 `json:"speed_ratio"`    // 语速比例，默认 1.0
	TrainingTimes int     `json:"training_times"` // 训练次数，默认 1
}

VolcengineCloneConfig 火山引擎克隆配置

type VolcengineCloneService ¶

type VolcengineCloneService struct {
	// contains filtered or unexported fields
}

VolcengineCloneService 火山引擎语音克隆服务

func NewVolcengineCloneService ¶

func NewVolcengineCloneService(config VolcengineCloneConfig) *VolcengineCloneService

NewVolcengineCloneService 创建火山引擎克隆服务

func (*VolcengineCloneService) CreateTask ¶

func (s *VolcengineCloneService) CreateTask(ctx context.Context, req *CreateTaskRequest) (*CreateTaskResponse, error)

CreateTask 创建训练任务注意：火山引擎的训练需要先在控制台创建 speaker_id，然后通过 SubmitAudio 上传音频这里返回一个占位任务ID，实际训练通过 SubmitAudio 完成

func (*VolcengineCloneService) GetTrainingTexts ¶

func (s *VolcengineCloneService) GetTrainingTexts(ctx context.Context, textID int64) (*TrainingText, error)

GetTrainingTexts 获取训练文本（火山引擎暂不支持，返回错误）

func (*VolcengineCloneService) Provider ¶

func (s *VolcengineCloneService) Provider() Provider

Provider 返回服务提供商

func (*VolcengineCloneService) QueryTaskStatus ¶

func (s *VolcengineCloneService) QueryTaskStatus(ctx context.Context, taskID string) (*TaskStatus, error)

QueryTaskStatus 查询任务状态 taskID 应该是 speaker_id

func (*VolcengineCloneService) SubmitAudio ¶

func (s *VolcengineCloneService) SubmitAudio(ctx context.Context, req *SubmitAudioRequest) error

SubmitAudio 提交音频文件进行训练 speaker_id 需要从控制台获取，或通过 TaskID 参数传入（格式：speaker_id:xxx）

func (*VolcengineCloneService) Synthesize ¶

func (s *VolcengineCloneService) Synthesize(ctx context.Context, req *SynthesizeRequest) (*SynthesizeResponse, error)

Synthesize 使用训练好的音色合成语音

func (*VolcengineCloneService) SynthesizeStream ¶

func (s *VolcengineCloneService) SynthesizeStream(ctx context.Context, req *SynthesizeRequest, handler SynthesisHandler) error

SynthesizeStream 流式合成语音

type VolcengineResponse ¶

type VolcengineResponse struct {
	ProtocolVersion          int
	HeaderSize               int
	MessageType              int
	MessageTypeSpecificFlags int
	SerializationMethod      int
	MessageCompression       int
	Reserved                 int
	SequenceNumber           int
	PayloadSize              int
	Audio                    []byte
	IsLast                   bool
	ErrorCode                int
	ErrorMessage             string
	Timestamp                *VolcengineSentenceTimestamp
}

VolcengineResponse 火山引擎WebSocket响应结构

type VolcengineSentenceTimestamp ¶

type VolcengineSentenceTimestamp struct {
	Words []Word `json:"words"`
}

VolcengineSentenceTimestamp 火山引擎句子时间戳（内部使用）

type Word ¶

type Word struct {
	Confidence float64 `json:"confidence"`
	EndTime    int     `json:"end_time"`
	StartTime  int     `json:"start_time"`
	Word       string  `json:"word"`
}

Word 单词时间戳

type XunfeiCloneConfig ¶

type XunfeiCloneConfig struct {
	AppID              string `json:"app_id"`
	APIKey             string `json:"api_key"`
	BaseURL            string `json:"base_url"`
	Timeout            int    `json:"timeout"`
	EngineVersion      string `json:"engine_version"` // 多风格版: omni_v1
	VCN                string `json:"vcn"`            // 合成 vcn：多风格 x6_clone，标准 x5_clone
	WebSocketAppID     string `json:"ws_app_id"`
	WebSocketAPIKey    string `json:"ws_api_key"`
	WebSocketAPISecret string `json:"ws_api_secret"`
}

XunfeiCloneConfig 讯飞一句话复刻配置（标准版 / 多风格版 omni_v1）

type XunfeiCloneService ¶

type XunfeiCloneService struct {
	// contains filtered or unexported fields
}

XunfeiCloneService 讯飞语音克隆服务

func NewXunfeiCloneService ¶

func NewXunfeiCloneService(config XunfeiCloneConfig) *XunfeiCloneService

NewXunfeiCloneService 创建讯飞克隆服务讯飞语音克隆服务实现

func (*XunfeiCloneService) CreateTask ¶

func (s *XunfeiCloneService) CreateTask(ctx context.Context, req *CreateTaskRequest) (*CreateTaskResponse, error)

CreateTask 创建训练任务（task/add，resourceType=12 一句话复刻）

func (*XunfeiCloneService) GetTrainingTexts ¶

func (s *XunfeiCloneService) GetTrainingTexts(ctx context.Context, textID int64) (*TrainingText, error)

GetTrainingTexts 获取训练文本讯飞语音克隆服务实现

func (*XunfeiCloneService) Provider ¶

func (s *XunfeiCloneService) Provider() Provider

Provider 返回服务提供商

func (*XunfeiCloneService) QueryTaskStatus ¶

func (s *XunfeiCloneService) QueryTaskStatus(ctx context.Context, taskID string) (*TaskStatus, error)

QueryTaskStatus 查询任务状态讯飞语音克隆服务实现

func (*XunfeiCloneService) SubmitAudio ¶

func (s *XunfeiCloneService) SubmitAudio(ctx context.Context, req *SubmitAudioRequest) error

SubmitAudio 提交音频文件

func (*XunfeiCloneService) Synthesize ¶

func (s *XunfeiCloneService) Synthesize(ctx context.Context, req *SynthesizeRequest) (*SynthesizeResponse, error)

Synthesize 使用训练好的音色合成语音

func (*XunfeiCloneService) SynthesizeStream ¶

func (s *XunfeiCloneService) SynthesizeStream(ctx context.Context, req *SynthesizeRequest, handler SynthesisHandler) error

SynthesizeStream 流式合成语音

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL