speech

package

v1.3.0 Latest Latest Go to latest Published: Feb 26, 2026 License: MIT Imports: 12 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/BaSui01/agentflow

Links

Open Source Insights

Documentation ¶

Index ¶

type DeepgramConfig
- func DefaultDeepgramConfig() DeepgramConfig
type DeepgramProvider
- func NewDeepgramProvider(cfg DeepgramConfig) *DeepgramProvider
- func (p *DeepgramProvider) Name() string
- func (p *DeepgramProvider) SupportedFormats() []string
- func (p *DeepgramProvider) Transcribe(ctx context.Context, req *STTRequest) (*STTResponse, error)
- func (p *DeepgramProvider) TranscribeFile(ctx context.Context, filepath string, opts *STTRequest) (*STTResponse, error)
type ElevenLabsConfig
- func DefaultElevenLabsConfig() ElevenLabsConfig
type ElevenLabsProvider
- func NewElevenLabsProvider(cfg ElevenLabsConfig) *ElevenLabsProvider
- func (p *ElevenLabsProvider) ListVoices(ctx context.Context) ([]Voice, error)
- func (p *ElevenLabsProvider) Name() string
- func (p *ElevenLabsProvider) Synthesize(ctx context.Context, req *TTSRequest) (*TTSResponse, error)
- func (p *ElevenLabsProvider) SynthesizeToFile(ctx context.Context, req *TTSRequest, filepath string) error
type OpenAISTTConfig
- func DefaultOpenAISTTConfig() OpenAISTTConfig
type OpenAISTTProvider
- func NewOpenAISTTProvider(cfg OpenAISTTConfig) *OpenAISTTProvider
- func (p *OpenAISTTProvider) Name() string
- func (p *OpenAISTTProvider) SupportedFormats() []string
- func (p *OpenAISTTProvider) Transcribe(ctx context.Context, req *STTRequest) (*STTResponse, error)
- func (p *OpenAISTTProvider) TranscribeFile(ctx context.Context, filepath string, opts *STTRequest) (*STTResponse, error)
type OpenAITTSConfig
- func DefaultOpenAITTSConfig() OpenAITTSConfig
type OpenAITTSProvider
- func NewOpenAITTSProvider(cfg OpenAITTSConfig) *OpenAITTSProvider
- func (p *OpenAITTSProvider) ListVoices(ctx context.Context) ([]Voice, error)
- func (p *OpenAITTSProvider) Name() string
- func (p *OpenAITTSProvider) Synthesize(ctx context.Context, req *TTSRequest) (*TTSResponse, error)
- func (p *OpenAITTSProvider) SynthesizeToFile(ctx context.Context, req *TTSRequest, filepath string) error
type STTProvider
type STTRequest
type STTResponse
type Segment
type TTSProvider
type TTSRequest
type TTSResponse
type Voice
type Word

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

This section is empty.

Types ¶

type DeepgramConfig ¶

type DeepgramConfig struct {
	APIKey  string        `json:"api_key" yaml:"api_key"`
	BaseURL string        `json:"base_url" yaml:"base_url"`
	Model   string        `json:"model,omitempty" yaml:"model,omitempty"` // nova-2
	Timeout time.Duration `json:"timeout,omitempty" yaml:"timeout,omitempty"`
}

DeepgramConfig 配置 Deepgram STT 提供者.

func DefaultDeepgramConfig ¶

func DefaultDeepgramConfig() DeepgramConfig

默认 DepgramConfig 返回默认 Depgram 配置。

type DeepgramProvider ¶

type DeepgramProvider struct {
	// contains filtered or unexported fields
}

DeepgramProvider使用Deepgram API执行STT.

func NewDeepgramProvider ¶

func NewDeepgramProvider(cfg DeepgramConfig) *DeepgramProvider

NewDeepgramProvider 创建新的 Deepgram STT 提供者.

func (*DeepgramProvider) Name ¶

func (p *DeepgramProvider) Name() string

func (*DeepgramProvider) SupportedFormats ¶

func (p *DeepgramProvider) SupportedFormats() []string

func (*DeepgramProvider) Transcribe ¶

func (p *DeepgramProvider) Transcribe(ctx context.Context, req *STTRequest) (*STTResponse, error)

将语音转换为使用Deepgram的文本。

func (*DeepgramProvider) TranscribeFile ¶

func (p *DeepgramProvider) TranscribeFile(ctx context.Context, filepath string, opts *STTRequest) (*STTResponse, error)

转录File转录音频文件.

type ElevenLabsConfig ¶

type ElevenLabsConfig struct {
	APIKey  string        `json:"api_key" yaml:"api_key"`
	BaseURL string        `json:"base_url" yaml:"base_url"`
	Model   string        `json:"model,omitempty" yaml:"model,omitempty"` // eleven_multilingual_v2
	VoiceID string        `json:"voice_id,omitempty" yaml:"voice_id,omitempty"`
	Timeout time.Duration `json:"timeout,omitempty" yaml:"timeout,omitempty"`
}

ElevenLabsConfig 配置 ElevenLabs TTS 提供者.

func DefaultElevenLabsConfig ¶

func DefaultElevenLabsConfig() ElevenLabsConfig

默认ElevenLabsconfig 返回默认的 11Labs 配置。

type ElevenLabsProvider ¶

type ElevenLabsProvider struct {
	// contains filtered or unexported fields
}

11LabsProvider使用11Labs API执行TTS.

func NewElevenLabsProvider ¶

func NewElevenLabsProvider(cfg ElevenLabsConfig) *ElevenLabsProvider

NewElevenLabsProvider 创建新的 ElevenLabs TTS 提供者.

func (*ElevenLabsProvider) ListVoices ¶

func (p *ElevenLabsProvider) ListVoices(ctx context.Context) ([]Voice, error)

ListVoices 返回可用的 11Labs 声音。

func (*ElevenLabsProvider) Name ¶

func (p *ElevenLabsProvider) Name() string

func (*ElevenLabsProvider) Synthesize ¶

func (p *ElevenLabsProvider) Synthesize(ctx context.Context, req *TTSRequest) (*TTSResponse, error)

合成会使用"十一律"将文本转换为语音.

func (*ElevenLabsProvider) SynthesizeToFile ¶

func (p *ElevenLabsProvider) SynthesizeToFile(ctx context.Context, req *TTSRequest, filepath string) error

将文本转换为语音并保存为文件。

type OpenAISTTConfig ¶

type OpenAISTTConfig struct {
	APIKey  string        `json:"api_key" yaml:"api_key"`
	BaseURL string        `json:"base_url" yaml:"base_url"`
	Model   string        `json:"model,omitempty" yaml:"model,omitempty"` // whisper-1
	Timeout time.Duration `json:"timeout,omitempty" yaml:"timeout,omitempty"`
}

OpenAISTTConfig 配置 OpenAI Whisper STT 提供者.

func DefaultOpenAISTTConfig ¶

func DefaultOpenAISTTConfig() OpenAISTTConfig

默认 OpenAISTTConfig 返回默认 OpenAI STT 配置。

type OpenAISTTProvider ¶

type OpenAISTTProvider struct {
	// contains filtered or unexported fields
}

OpenAISTTProvider使用OpenAI Whisper API执行STT.

func NewOpenAISTTProvider ¶

func NewOpenAISTTProvider(cfg OpenAISTTConfig) *OpenAISTTProvider

NewOpenAISTTProvider 创建新的 OpenAI STT 提供者.

func (*OpenAISTTProvider) Name ¶

func (p *OpenAISTTProvider) Name() string

func (*OpenAISTTProvider) SupportedFormats ¶

func (p *OpenAISTTProvider) SupportedFormats() []string

func (*OpenAISTTProvider) Transcribe ¶

func (p *OpenAISTTProvider) Transcribe(ctx context.Context, req *STTRequest) (*STTResponse, error)

将语音转换为文本。

func (*OpenAISTTProvider) TranscribeFile ¶

func (p *OpenAISTTProvider) TranscribeFile(ctx context.Context, filepath string, opts *STTRequest) (*STTResponse, error)

转录File转录音频文件.

type OpenAITTSConfig ¶

type OpenAITTSConfig struct {
	APIKey  string        `json:"api_key" yaml:"api_key"`
	BaseURL string        `json:"base_url" yaml:"base_url"`
	Model   string        `json:"model,omitempty" yaml:"model,omitempty"` // tts-1, tts-1-hd
	Voice   string        `json:"voice,omitempty" yaml:"voice,omitempty"` // alloy, echo, fable, onyx, nova, shimmer
	Timeout time.Duration `json:"timeout,omitempty" yaml:"timeout,omitempty"`
}

OpenAITTSConfig 配置 OpenAI TTS 提供者.

func DefaultOpenAITTSConfig ¶

func DefaultOpenAITTSConfig() OpenAITTSConfig

默认 OpenAITTSConfig 返回默认 OpenAI TTS 配置。

type OpenAITTSProvider ¶

type OpenAITTSProvider struct {
	// contains filtered or unexported fields
}

OpenATSProvider使用OpenAI的API执行TTS.

func NewOpenAITTSProvider ¶

func NewOpenAITTSProvider(cfg OpenAITTSConfig) *OpenAITTSProvider

NewOpenAITTSProvider 创建新的 OpenAI TTS 提供者.

func (*OpenAITTSProvider) ListVoices ¶

func (p *OpenAITTSProvider) ListVoices(ctx context.Context) ([]Voice, error)

ListVoices 返回可用的 OpenAI 语音。

func (*OpenAITTSProvider) Name ¶

func (p *OpenAITTSProvider) Name() string

func (*OpenAITTSProvider) Synthesize ¶

func (p *OpenAITTSProvider) Synthesize(ctx context.Context, req *TTSRequest) (*TTSResponse, error)

合成大小将文本转换为语音.

func (*OpenAITTSProvider) SynthesizeToFile ¶

func (p *OpenAITTSProvider) SynthesizeToFile(ctx context.Context, req *TTSRequest, filepath string) error

将文本转换为语音并保存为文件。

type STTProvider ¶

type STTProvider interface {
	// Transcribe 将语音转换为文本.
	Transcribe(ctx context.Context, req *STTRequest) (*STTResponse, error)

	// TranscribeFile 转录音频文件.
	TranscribeFile(ctx context.Context, filepath string, opts *STTRequest) (*STTResponse, error)

	// Name 返回提供者名称.
	Name() string

	// SupportedFormats 返回支持的音频格式.
	SupportedFormats() []string
}

STTProvider 定义 STT 提供者接口.

type STTRequest ¶

type STTRequest struct {
	Audio                  io.Reader         `json:"-"`
	AudioURL               string            `json:"audio_url,omitempty"`
	Model                  string            `json:"model,omitempty"`
	Language               string            `json:"language,omitempty"`        // ISO-639-1 code
	Prompt                 string            `json:"prompt,omitempty"`          // Context hint
	ResponseFormat         string            `json:"response_format,omitempty"` // json, text, srt, vtt, verbose_json
	Temperature            float64           `json:"temperature,omitempty"`
	TimestampGranularities []string          `json:"timestamp_granularities,omitempty"` // word, segment
	Diarization            bool              `json:"diarization,omitempty"`             // Speaker identification
	Metadata               map[string]string `json:"metadata,omitempty"`
}

STTRequest 表示语音转文本请求.

type STTResponse ¶

type STTResponse struct {
	Provider   string        `json:"provider"`
	Model      string        `json:"model"`
	Text       string        `json:"text"`
	Language   string        `json:"language,omitempty"`
	Duration   time.Duration `json:"duration,omitempty"`
	Segments   []Segment     `json:"segments,omitempty"`
	Words      []Word        `json:"words,omitempty"`
	Confidence float64       `json:"confidence,omitempty"`
	CreatedAt  time.Time     `json:"created_at"`
}

STTResponse 表示 STT 请求的响应.

type Segment ¶

type Segment struct {
	ID         int           `json:"id"`
	Start      time.Duration `json:"start"`
	End        time.Duration `json:"end"`
	Text       string        `json:"text"`
	Speaker    string        `json:"speaker,omitempty"`
	Confidence float64       `json:"confidence,omitempty"`
}

Segment 表示转录片段.

type TTSProvider ¶

type TTSProvider interface {
	// Synthesize 将文本转换为语音.
	Synthesize(ctx context.Context, req *TTSRequest) (*TTSResponse, error)

	// SynthesizeToFile 将文本转换为语音并保存为文件.
	SynthesizeToFile(ctx context.Context, req *TTSRequest, filepath string) error

	// ListVoices 返回可用声音.
	ListVoices(ctx context.Context) ([]Voice, error)

	// Name 返回提供者名称.
	Name() string
}

TTSProvider 定义 TTS 提供者接口.

type TTSRequest ¶

type TTSRequest struct {
	Text           string            `json:"text"`
	Model          string            `json:"model,omitempty"`
	Voice          string            `json:"voice,omitempty"`
	Speed          float64           `json:"speed,omitempty"`           // 0.25-4.0
	ResponseFormat string            `json:"response_format,omitempty"` // mp3, opus, aac, flac, wav, pcm
	Language       string            `json:"language,omitempty"`
	Metadata       map[string]string `json:"metadata,omitempty"`
}

TTSRequest 表示文本转语音请求.

type TTSResponse ¶

type TTSResponse struct {
	Provider  string        `json:"provider"`
	Model     string        `json:"model"`
	Audio     io.ReadCloser `json:"-"`                    // Audio stream
	AudioData []byte        `json:"audio_data,omitempty"` // Audio bytes (if buffered)
	Format    string        `json:"format"`
	Duration  time.Duration `json:"duration,omitempty"`
	CharCount int           `json:"char_count,omitempty"`
	CreatedAt time.Time     `json:"created_at"`
}

TTSResponse 表示 TTS 请求的响应.

type Voice ¶

type Voice struct {
	ID          string   `json:"id"`
	Name        string   `json:"name"`
	Language    string   `json:"language"`
	Gender      string   `json:"gender,omitempty"` // male, female, neutral
	Description string   `json:"description,omitempty"`
	PreviewURL  string   `json:"preview_url,omitempty"`
	Labels      []string `json:"labels,omitempty"`
}

Voice 表示一个可用的声音.

type Word ¶

type Word struct {
	Word       string        `json:"word"`
	Start      time.Duration `json:"start"`
	End        time.Duration `json:"end"`
	Confidence float64       `json:"confidence,omitempty"`
	Speaker    string        `json:"speaker,omitempty"`
}

Word 表示带时间戳的转录词.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL