speech

package

v0.10.0 Latest Latest Go to latest Published: Jun 16, 2025 License: MIT Imports: 2 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/showntop/llmack

Links

Open Source Insights

Documentation ¶

Index ¶

func PcmToWav(chunk []byte, numchannel int, saplerate int) []byte
type ASR
type AgentPayload
type AudioChunk
type Outbound
type RealtimeTTS
type ResponsePayload
type StreamTTS
type Subtitle
type TTSResult
- func NewTTSResult() *TTSResult
type TTSResultPayload
type TextTTS
type TextTTSResult
type TranscriptionPayload
type TurnPayload

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

func PcmToWav ¶

func PcmToWav(chunk []byte, numchannel int, saplerate int) []byte

PcmToWav TODO

* chunk:二进制字符串 numchannel:1=单声道，2=多声道 saplerate：采样率 8000/16000

Types ¶

type ASR ¶

type ASR interface {
	Recognize(buffer []byte) (string, error)
	Input(buffer []byte) error // 流入音频数据， async recognize
	Close() error              // 流入音频数据， async recognize
}

ASR ...

type AgentPayload ¶

type AgentPayload struct {
	TurnID        string
	Transcription TranscriptionPayload
}

AgentPayload ...

type AudioChunk ¶

type AudioChunk struct {
	Audio     string
	Text      string
	ToolCalls []llm.ToolCall
}

AudioChunk ...

type Outbound ¶

type Outbound interface {
	Write([]byte) error
	Reset() error
	Close() error
}

Outbound ...

type RealtimeTTS ¶

type RealtimeTTS interface {
	Synthesize(ctx context.Context, text string) (*TTSResult, error)
}

RealtimeTTS is the struct for text to speech

type ResponsePayload ¶

type ResponsePayload struct {
	Text  string
	Chunk []byte
	First bool
	Final bool
}

ResponsePayload ...

type StreamTTS ¶

type StreamTTS interface {
	Prepare() error
	Input(string) error
	Complete() error
	Terminate() error
	StreamResult() chan []byte
}

StreamTTS ...

type Subtitle ¶

type Subtitle struct {
	// ⽂本信息。
	Text string `json:"text,omitnil,omitempty" name:"text"`

	// ⽂本对应tts语⾳开始时间戳，单位ms。
	BeginTime int64 `json:"BeginTime,omitnil,omitempty" name:"BeginTime"`

	// ⽂本对应tts语⾳结束时间戳，单位ms。
	EndTime int64 `json:"EndTime,omitnil,omitempty" name:"EndTime"`

	// 该文本在时间戳数组中的开始位置，从0开始。
	BeginIndex int64 `json:"BeginIndex,omitnil,omitempty" name:"BeginIndex"`

	// 该文本在时间戳数组中的结束位置，从0开始。
	EndIndex int64 `json:"EndIndex,omitnil,omitempty" name:"EndIndex"`

	// 该字的音素。
	// 注意：此字段可能返回 null，表示取不到有效值。
	Phoneme string `json:"Phoneme,omitnil,omitempty" name:"Phoneme"`
}

type TTSResult ¶

type TTSResult struct {
	SessionID string `json:"session_id"` //音频流唯一 id，由客户端在握手阶段生成并赋值在调用参数中
	// RequestId string             `json:"request_id"` //音频流唯一 id，由服务端在握手阶段自动生成
	MessageID string      `json:"message_id"` //本 message 唯一 id
	Data      chan []byte `json:"data"`       //最新语音合成文本结果/音频流数据
	Audios    []byte
	Subtitles []Subtitle `json:"subtitles"`
}

TTSResult is the basic struct for the TTS response.

func NewTTSResult ¶

func NewTTSResult() *TTSResult

type TTSResultPayload ¶

type TTSResultPayload struct {
	Text   string
	Stream chan []byte
}

TTSResultPayload ...

type TextTTS ¶

type TextTTS interface {
	Synthesize(ctx context.Context, text string) (*TextTTSResult, error)
}

TextTTS is the struct for text to speech

type TextTTSResult ¶

type TextTTSResult struct {
	Audio     string
	Subtitles []Subtitle
}

type TranscriptionPayload ¶

type TranscriptionPayload struct {
	Text  string
	Final bool
}

TranscriptionPayload ...

type TurnPayload ¶

type TurnPayload struct {
	TurnID        string
	Transcription *TranscriptionPayload
}

TurnPayload ...

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
asr
tencent
outbound
tts
aliyun
minmax
tencent

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL