speech

package
v0.10.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 16, 2025 License: MIT Imports: 2 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func PcmToWav

func PcmToWav(chunk []byte, numchannel int, saplerate int) []byte

PcmToWav TODO

* chunk:二进制字符串 numchannel:1=单声道,2=多声道 saplerate:采样率 8000/16000

Types

type ASR

type ASR interface {
	Recognize(buffer []byte) (string, error)
	Input(buffer []byte) error // 流入音频数据, async recognize
	Close() error              // 流入音频数据, async recognize
}

ASR ...

type AgentPayload

type AgentPayload struct {
	TurnID        string
	Transcription TranscriptionPayload
}

AgentPayload ...

type AudioChunk

type AudioChunk struct {
	Audio     string
	Text      string
	ToolCalls []llm.ToolCall
}

AudioChunk ...

type Outbound

type Outbound interface {
	Write([]byte) error
	Reset() error
	Close() error
}

Outbound ...

type RealtimeTTS

type RealtimeTTS interface {
	Synthesize(ctx context.Context, text string) (*TTSResult, error)
}

RealtimeTTS is the struct for text to speech

type ResponsePayload

type ResponsePayload struct {
	Text  string
	Chunk []byte
	First bool
	Final bool
}

ResponsePayload ...

type StreamTTS

type StreamTTS interface {
	Prepare() error
	Input(string) error
	Complete() error
	Terminate() error
	StreamResult() chan []byte
}

StreamTTS ...

type Subtitle

type Subtitle struct {
	// ⽂本信息。
	Text string `json:"text,omitnil,omitempty" name:"text"`

	// ⽂本对应tts语⾳开始时间戳,单位ms。
	BeginTime int64 `json:"BeginTime,omitnil,omitempty" name:"BeginTime"`

	// ⽂本对应tts语⾳结束时间戳,单位ms。
	EndTime int64 `json:"EndTime,omitnil,omitempty" name:"EndTime"`

	// 该文本在时间戳数组中的开始位置,从0开始。
	BeginIndex int64 `json:"BeginIndex,omitnil,omitempty" name:"BeginIndex"`

	// 该文本在时间戳数组中的结束位置,从0开始。
	EndIndex int64 `json:"EndIndex,omitnil,omitempty" name:"EndIndex"`

	// 该字的音素。
	// 注意:此字段可能返回 null,表示取不到有效值。
	Phoneme string `json:"Phoneme,omitnil,omitempty" name:"Phoneme"`
}

type TTSResult

type TTSResult struct {
	SessionID string `json:"session_id"` //音频流唯一 id,由客户端在握手阶段生成并赋值在调用参数中
	// RequestId string             `json:"request_id"` //音频流唯一 id,由服务端在握手阶段自动生成
	MessageID string      `json:"message_id"` //本 message 唯一 id
	Data      chan []byte `json:"data"`       //最新语音合成文本结果/音频流数据
	Audios    []byte
	Subtitles []Subtitle `json:"subtitles"`
}

TTSResult is the basic struct for the TTS response.

func NewTTSResult

func NewTTSResult() *TTSResult

type TTSResultPayload

type TTSResultPayload struct {
	Text   string
	Stream chan []byte
}

TTSResultPayload ...

type TextTTS

type TextTTS interface {
	Synthesize(ctx context.Context, text string) (*TextTTSResult, error)
}

TextTTS is the struct for text to speech

type TextTTSResult

type TextTTSResult struct {
	Audio     string
	Subtitles []Subtitle
}

type TranscriptionPayload

type TranscriptionPayload struct {
	Text  string
	Final bool
}

TranscriptionPayload ...

type TurnPayload

type TurnPayload struct {
	TurnID        string
	Transcription *TranscriptionPayload
}

TurnPayload ...

Directories

Path Synopsis
asr
tts

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL