Versions in this module Expand all Collapse all v1 v1.2.0 Jun 6, 2026 Changes in this version + const CompressionGZIP + const DefaultSampleRate + const FlagNegSequence + const FlagNegWithSequence + const FlagNoSequence + const FlagPosSequence + const MessageTypeClientAudioOnlyRequest + const MessageTypeClientFullRequest + const MessageTypeServerErrorResponse + const MessageTypeServerFullResponse + const ProtocolVersionV1 + const SerializationJSON + const SerializationNone + const ServerAck + const ServerErrorResponse + const ServerFullResponse + const SuccessCode + var DefaultAudioOnlyWsHeader = []byte + var DefaultFullClientWsHeader = []byte + var DefaultLastAudioWsHeader = []byte + var ErrClientClosed = errors.New("asr client closed") + var FussyMap = map[string]string + func ASRFilterSenderName(senderName, direction string, h media.MediaHandler) string + func AudioIntercept() media.MediaHandlerFunc + func BuildAuthHeader(auth AuthConfig) http.Header + func ComputeSampleByteCount(sampleRate, bitDepth, channels int) int + func ConvertToWAV(audioPath string, sampleRate int) ([]byte, error) + func GenerateCorpusContext(hotwords []HotWord) string + func GzipCompress(input []byte) []byte + func GzipDecompress(input []byte) []byte + func IsWAVFile(data []byte) bool + func NewAudioOnlyRequest(seq int, segment []byte) []byte + func NewFullClientRequest(config *Config) []byte + func ReadWAVInfo(data []byte) (int, int, int, int, []byte, error) + func WithAwsASR(opt AwsASROption) media.MediaHandlerFunc + func WithBaiduASR(opt BaiduASROption) media.MediaHandlerFunc + func WithDeepgramASR(opt DeepgramASROption) media.MediaHandlerFunc + func WithQCloudASR(opt QCloudASROption) media.MediaHandlerFunc + func WithTranscribeFilter(asr SpeechRecognitionEngine, h media.MediaHandler, opt TranscribeOption) media.PacketFilter + func WithTranscribeFilterState(asr SpeechRecognitionEngine, h media.MediaHandler, opt TranscribeOption) media.PacketFilter + func WithTranscribeFilterStateV2(asr SpeechRecognitionEngine, h media.MediaHandler, opt TranscribeOption) media.PacketFilter + func WithVoiceapiASR(opt VoiceapiASROption) media.MediaHandlerFunc + func WithVolcengineASR(opt VolcengineOption) media.MediaHandlerFunc + func WithWhisperASR(opt WhisperASROption) media.MediaHandlerFunc + type AsrCorrector struct + FuzzyWords map[string]string + ReplaceWords map[string]string + func NewAsrCorrector(opt AsrCorrectorOption) *AsrCorrector + func (ac *AsrCorrector) Correct(text string) string + func (ac *AsrCorrector) SegmentWords(s string) []string + type AsrCorrectorOption struct + FuzzyWords map[string]string + ReplaceWords map[string]string + type AudioConfig struct + Bits int + Channel int + Codec string + Format string + Rate int + type AudioFrame struct + Data []byte + IsEnd bool + type AudioMeta struct + Bits int + Channel int + Codec string + Format string + Rate int + type AuthConfig struct + AccessKey string + AppKey string + ResourceId string + type AwsASR struct + type AwsASROption struct + AppID string + Encoding types.MediaEncoding + Region string + ReqChanSize int + SampleRate int32 + func NewAwsASROption(appId, region string, language string) AwsASROption + func (opt *AwsASROption) GetVendor() Vendor + type BaiduASR struct + Sentence string + type BaiduASRBeginParam struct + Data Data + Type string + func NewBeginParam(opt BaiduASROption) BaiduASRBeginParam + type BaiduASROption struct + AppID int + AppKey string + CuId string + DevPid int + Format string + LmId int + ReqChanSize int + Sample int + Url string + func NewBaiduASROption(appId int, appKey string, devPid int, format string, sample int) BaiduASROption + func (opt *BaiduASROption) GetVendor() Vendor + type BaiduASRWSResponse struct + EndTime int + ErrMsg string + ErrNo int + LogId int + Result string + Sn string + StartTime int + Type string + type BufferConfig struct + MaxBufferSize int + SegmentDurationMs int + type Client struct + func NewClient(config *Config) *Client + func (c *Client) Close() + func (c *Client) Connect(ctx context.Context) error + func (c *Client) GetTraceID() string + func (c *Client) IsClosed() bool + func (c *Client) ReceiveResult() (*Response, error) + func (c *Client) SendAudioFrame(frame *AudioFrame) error + func (c *Client) SetErrorCallback(handler func(error)) + func (c *Client) SetTimeouts(sendTimeout, recvTimeout time.Duration) + type CompressionType byte + type Config struct + Audio AudioConfig + Auth AuthConfig + Buffer BufferConfig + Request RequestConfig + URL string + User UserConfig + func DefaultConfig() *Config + func (c *Config) CalculateBufferSize() int + func (c *Config) WithAudio(audio AudioConfig) *Config + func (c *Config) WithAuth(auth AuthConfig) *Config + func (c *Config) WithBuffer(buffer BufferConfig) *Config + func (c *Config) WithRequest(request RequestConfig) *Config + func (c *Config) WithURL(url string) *Config + func (c *Config) WithUser(user UserConfig) *Config + type ConfigReader struct + func NewConfigReader(config map[string]interface{}) *ConfigReader + func (r *ConfigReader) Int(keys ...interface{}) int + func (r *ConfigReader) String(keysAndDefault ...string) string + type CorpusConfig struct + BoostingTableName string + Context string + CorrectTableName string + type CorpusMeta struct + BoostingTableName string + Context string + CorrectTableName string + type Data struct + AppId int + AppKey string + CuId string + DevPid int + Format string + LmId int + Sample int + type DeepgramASR struct + EndTime uint32 + Sentence string + func (dg *DeepgramASR) Close(cr *interfacesv1.CloseResponse) error + func (dg *DeepgramASR) Error(er *interfacesv1.ErrorResponse) error + func (dg *DeepgramASR) Message(mr *interfacesv1.MessageResponse) error + func (dg *DeepgramASR) Metadata(md *interfacesv1.MetadataResponse) error + func (dg *DeepgramASR) Open(or *interfacesv1.OpenResponse) error + func (dg *DeepgramASR) SpeechStarted(ssr *interfacesv1.SpeechStartedResponse) error + func (dg *DeepgramASR) UnhandledEvent(byData []byte) error + func (dg *DeepgramASR) UtteranceEnd(ur *interfacesv1.UtteranceEndResponse) error + type DeepgramASROption struct + ApiKey string + Channels int + Encoding string + KeepAliveDuration string + Language string + Model string + ReqChanSize int + SampleRate int + func NewDeepgramASROption(apiKey string, model string, language string) DeepgramASROption + func (opt *DeepgramASROption) GetVendor() Vendor + type DefaultTranscriberFactory struct + func GetGlobalFactory() *DefaultTranscriberFactory + func NewTranscriberFactory() *DefaultTranscriberFactory + func (f *DefaultTranscriberFactory) CreateTranscriber(config TranscriberConfig) (SpeechRecognitionEngine, error) + func (f *DefaultTranscriberFactory) GetSupportedVendors() []Vendor + func (f *DefaultTranscriberFactory) IsVendorSupported(vendor Vendor) bool + func (f *DefaultTranscriberFactory) RegisterCreator(vendor Vendor, ...) + type Event struct + Header FunHeader + Payload FunPayload + type FunASRCallback struct + func NewFunASR(opt FunASROption) FunASRCallback + func (fun *FunASRCallback) Activity() bool + func (fun *FunASRCallback) ConnAndReceive(dialogID string) error + func (fun *FunASRCallback) Init(tr SpeechRecognitionResult, er RecognitionError) + func (fun *FunASRCallback) RestartClient() + func (fun *FunASRCallback) SendAudioBytes(data []byte) error + func (fun *FunASRCallback) SendEnd() error + func (fun *FunASRCallback) StopConn() error + func (fun *FunASRCallback) Vendor() string + type FunASRClient struct + type FunASRMessage struct + IsFinal bool + Mode string + Text string + type FunASROption struct + AudioFs int + ChunkInterval int + ChunkSize []int + DecoderChunkLookBack int + EncoderChunkLookBack int + Hotwords string + IsSpeaking bool + Itn bool + Mode string + ReqChanSize int + Url string + WavFormat string + WavName string + func NewFunASROption(url string) FunASROption + func (opt *FunASROption) GetVendor() Vendor + type FunASRRequestOption struct + AudioFs int + ChunkInterval int + ChunkSize []int + DecoderChunkLookBack int + EncoderChunkLookBack int + Hotwords string + IsSpeaking bool + Itn bool + Mode string + WavFormat string + WavName string + func NewFunASRRequestOption(opt FunASROption) FunASRRequestOption + type FunAsrRealtime struct + Handler media.MediaHandler + func NewFunAsrRealtime(opt FunAsrRealtimeOption) FunAsrRealtime + func (fun *FunAsrRealtime) Activity() bool + func (fun *FunAsrRealtime) ConnAndReceive(dialogID string) error + func (fun *FunAsrRealtime) Init(tr SpeechRecognitionResult, er RecognitionError) + func (fun *FunAsrRealtime) RestartClient() + func (fun *FunAsrRealtime) SendAudioBytes(data []byte) error + func (fun *FunAsrRealtime) SendEnd() error + func (fun *FunAsrRealtime) StopConn() error + func (fun *FunAsrRealtime) Vendor() string + type FunAsrRealtimeClient struct + type FunAsrRealtimeOption struct + ApiKey string + DisfluencyRemovalEnabled bool + EnableITN bool + EnableWords bool + Format string + Heartbeat bool + LanguageHints string + MaxSentenceSilence uint + Model string + SampleRate int + Url string + func (opt *FunAsrRealtimeOption) GetVendor() Vendor + type FunHeader struct + Action string + Attributes map[string]interface{} + ErrorCode string + ErrorMessage string + Event string + Streaming string + TaskID string + type FunPayload struct + Function string + Input Input + Model string + Output Output + Parameters Params + Task string + TaskGroup string + Usage ... + type GladiaASR struct + Sentence string + func NewGladiaASR(opt GladiaASROption) GladiaASR + func (gla *GladiaASR) Activity() bool + func (gla *GladiaASR) ConnAndReceive(dialogID string) error + func (gla *GladiaASR) Init(tr SpeechRecognitionResult, er RecognitionError) + func (gla *GladiaASR) RestartClient() + func (gla *GladiaASR) SendAudioBytes(data []byte) error + func (gla *GladiaASR) SendEnd() error + func (gla *GladiaASR) StopConn() error + func (gla *GladiaASR) Vendor() string + type GladiaASROption struct + ApiKey string + Encoding string + ReqChanSize int + Url string + func NewGladiaASROption(apiKey string, encoding string) GladiaASROption + func (opt *GladiaASROption) GetVendor() Vendor + type GladiaUtterance struct + Confidence float64 + ID int + Language string + Stable bool + TimeBegin float64 + TimeEnd float64 + Transcription string + type GoogleASR struct + Sentence string + func NewGoogleASR(opt GoogleASROption) GoogleASR + func (google *GoogleASR) Activity() bool + func (google *GoogleASR) ConnAndReceive(dialogID string) error + func (google *GoogleASR) Init(tr SpeechRecognitionResult, er RecognitionError) + func (google *GoogleASR) RestartClient() + func (google *GoogleASR) SendAudioBytes(data []byte) error + func (google *GoogleASR) SendEnd() error + func (google *GoogleASR) StopConn() error + func (google *GoogleASR) Vendor() string + type GoogleASROption struct + Encoding speechpb.RecognitionConfig_AudioEncoding + LanguageCode string + ReqChainSize int + SampleRateHertz int32 + func NewGoogleASROption(encoding speechpb.RecognitionConfig_AudioEncoding, sampleRateHertz int32, ...) GoogleASROption + func (opt *GoogleASROption) GetVendor() Vendor + type HotWord struct + Weight int + Word string + type Input struct + type LocalASRConfig struct + BitDepth int + BufferSize int + Channels int + Command string + EnableVAD bool + Language string + ModelPath string + Provider LocalASRProvider + SampleRate int + VADThreshold float32 + func NewLocalASRConfig(provider LocalASRProvider, modelPath string) *LocalASRConfig + func (opt *LocalASRConfig) GetVendor() Vendor + type LocalASRProvider string + const LocalASRProviderLocal + const LocalASRProviderWhisperCpp + type LocalASRService struct + func NewLocalASRService(config *LocalASRConfig) (*LocalASRService, error) + func (s *LocalASRService) Activity() bool + func (s *LocalASRService) Close() error + func (s *LocalASRService) ConnAndReceive(dialogId string) error + func (s *LocalASRService) Init(tr SpeechRecognitionResult, er RecognitionError) + func (s *LocalASRService) RestartClient() + func (s *LocalASRService) SendAudioBytes(data []byte) error + func (s *LocalASRService) SendEnd() error + func (s *LocalASRService) StopConn() error + func (s *LocalASRService) Vendor() string + type MessageType byte + type MessageTypeSpecificFlags byte + type Output struct + Sentence struct{ ... } + type Params struct + DisfluencyRemovalEnabled bool + Format string + Heartbeat bool + LanguageHints []string + MaxSentenceSilence uint + SampleRate int + VocabularyID string + type ProtocolHeader struct + func NewDefaultHeader() *ProtocolHeader + func (h *ProtocolHeader) Serialize() []byte + func (h *ProtocolHeader) SetCompressionType(compType CompressionType) *ProtocolHeader + func (h *ProtocolHeader) SetMessageType(msgType MessageType) *ProtocolHeader + func (h *ProtocolHeader) SetMessageTypeFlags(flags MessageTypeSpecificFlags) *ProtocolHeader + func (h *ProtocolHeader) SetReservedData(data []byte) *ProtocolHeader + func (h *ProtocolHeader) SetSerializationType(serType SerializationType) *ProtocolHeader + type ProtocolVersion byte + type QCloudASR struct + Handler media.MediaHandler + func NewQcloudASR(opt QCloudASROption) *QCloudASR + func (asq *QCloudASR) Activity() bool + func (asq *QCloudASR) ConnAndReceive(dialogID string) error + func (asq *QCloudASR) Init(tr SpeechRecognitionResult, er RecognitionError) + func (asq *QCloudASR) OnFail(response *asr.SpeechRecognitionResponse, err error) + func (asq *QCloudASR) OnRecognitionComplete(response *asr.SpeechRecognitionResponse) + func (asq *QCloudASR) OnRecognitionResultChange(response *asr.SpeechRecognitionResponse) + func (asq *QCloudASR) OnRecognitionStart(response *asr.SpeechRecognitionResponse) + func (asq *QCloudASR) OnSentenceBegin(response *asr.SpeechRecognitionResponse) + func (asq *QCloudASR) OnSentenceEnd(response *asr.SpeechRecognitionResponse) + func (asq *QCloudASR) RestartClient() + func (asq *QCloudASR) SendAudioBytes(data []byte) error + func (asq *QCloudASR) SendEnd() error + func (asq *QCloudASR) StopConn() error + func (asq *QCloudASR) Vendor() string + type QCloudASROption struct + AppID string + Format int + HotWords []HotWord + ModelType string + ReqChanSize int + SecretID string + SecretKey string + func NewQcloudASROption(appId string, secretId string, secretKey string) QCloudASROption + func (opt *QCloudASROption) GetVendor() Vendor + func (opt QCloudASROption) String() string + type RecognitionError func(err error, isFatal bool) + type Recognizer struct + func NewRecognizer(config *Config) *Recognizer + func (r *Recognizer) GetTraceID() string + func (r *Recognizer) OnError(callback onErrorFunc) + func (r *Recognizer) OnResult(callback onResultFunc) + func (r *Recognizer) SendAudioFrame(frame []byte, end bool) error + func (r *Recognizer) Start() error + func (r *Recognizer) Stop() + type RequestConfig struct + Corpus CorpusConfig + EnableDDC bool + EnableITN bool + EnableNonstream bool + EnablePUNC bool + ModelName string + ShowUtterances bool + type RequestMeta struct + Corpus CorpusMeta + EnableDDC bool + EnableITN bool + EnableNonstream bool + EnablePUNC bool + ModelName string + ShowUtterances bool + type RequestPayload struct + Audio AudioMeta + Request RequestMeta + User UserMeta + type Response struct + Code int + Err error + Event int + IsLastPackage bool + PayloadMsg *ResponsePayload + PayloadSequence int32 + PayloadSize int + func ParseResponse(msg []byte) *Response + type ResponsePayload struct + AudioInfo struct{ ... } + Error string + Result struct{ ... } + type Result struct + Error error + IsFinal bool + Text string + Timestamp time.Time + type ResultCallback func(*Result) + type RingBuffer struct + func NewRingBuffer(size int) *RingBuffer + func (r *RingBuffer) Read(n int) []byte + func (r *RingBuffer) Write(data []byte) int + type SerializationType byte + type SpeechRecognitionEngine interface + Activity func() bool + ConnAndReceive func(dialogId string) error + Init func(resultCallback SpeechRecognitionResult, errorCallback RecognitionError) + RestartClient func() + SendAudioBytes func(data []byte) error + SendEnd func() error + StopConn func() error + Vendor func() string + type SpeechRecognitionResult func(text string, isLast bool, duration time.Duration, uuid string) + type TimeoutConfig struct + Read time.Duration + Send time.Duration + type TranscribeOneShot struct + Padding int + type TranscribeOption struct + AsrOptions map[string]any + Direction string + FuzzyOptions AsrCorrectorOption + type TranscriberConfig interface + GetVendor func() Vendor + func NewTranscriberConfigFromMap(provider string, config map[string]interface{}, language string) (TranscriberConfig, error) + type TranscriberFactory interface + CreateTranscriber func(config TranscriberConfig) (SpeechRecognitionEngine, error) + GetSupportedVendors func() []Vendor + IsVendorSupported func(vendor Vendor) bool + type Transcript struct + Code string + Confidence float64 + Duration float64 + Event string + InferenceTime float64 + Language string + Message string + RequestID string + TimeBegin float64 + TimeEnd float64 + Transcription string + Type string + Utterances []GladiaUtterance + type UserConfig struct + APPVersion string + DID string + Platform string + SDKVersion string + UID string + type UserMeta struct + APPVersion string + DID string + Platform string + SDKVersion string + UID string + type Utterance struct + Definite bool + EndTime int + Language string + StartTime int + Text string + Words []Word + type Vendor string + const VendorAWS + const VendorAliyun + const VendorBaidu + const VendorDeepgram + const VendorFunASR + const VendorFunASRRealtime + const VendorGladia + const VendorGoogle + const VendorLocal + const VendorQCloud + const VendorVoiceAPI + const VendorVolcengine + const VendorVolcengineLLM + const VendorWhisper + const VendorXfyunMul + func GetVendor(provider string) Vendor + type VoiceapiASR struct + Sentence string + type VoiceapiASROption struct + ReqChanSize int + Url string + func NewVoiceapiASROption(url string) VoiceapiASROption + func (opt *VoiceapiASROption) GetVendor() Vendor + type VoiceapiResponse struct + Finished bool + Idx int + Text string + type VolcEngineResponse struct + Code int + Message string + Reqid string + Results []VolcengineResult + Sequence int + type Volcengine struct + Sentence string + type VolcengineClient struct + func (c *VolcengineClient) String() string + type VolcengineLLMASR struct + func NewVolcengineLLM(opt VolcengineLLMOption) VolcengineLLMASR + func (v *VolcengineLLMASR) Activity() bool + func (v *VolcengineLLMASR) ConnAndReceive(dialogID string) error + func (v *VolcengineLLMASR) Init(tr SpeechRecognitionResult, er RecognitionError) + func (v *VolcengineLLMASR) RestartClient() + func (v *VolcengineLLMASR) SendAudioBytes(data []byte) error + func (v *VolcengineLLMASR) SendEnd() error + func (v *VolcengineLLMASR) StopConn() error + func (v *VolcengineLLMASR) Vendor() string + type VolcengineLLMOption struct + AccessToken string + AppID string + BitDepth int + Channel int + Codec string + Format string + HotWords []HotWord + ReqChanSize int + ResourceId string + SampleRate int + Url string + func NewVolcengineLLMOption(token, appID string) VolcengineLLMOption + func (opt *VolcengineLLMOption) GetVendor() Vendor + type VolcengineOption struct + AppID string + Cluster string + Codec string + Format string + ReqChanSize int + Token string + Url string + WorkFlow string + func NewVolcengineOption(appId string, token string, cluster string, format string) VolcengineOption + func (opt *VolcengineOption) GetVendor() Vendor + type VolcengineResult struct + Confidence int + Language string + Text string + Utterances []Utterance + type WAVHeader struct + AudioFormat uint16 + BitsPerSample uint16 + BlockAlign uint16 + ByteRate uint32 + ChunkID [4]byte + ChunkSize uint32 + Format [4]byte + NumChannels uint16 + SampleRate uint32 + Subchunk1ID [4]byte + Subchunk1Size uint32 + Subchunk2ID [4]byte + Subchunk2Size uint32 + type WhisperASR struct + EndTime uint32 + Sentence string + type WhisperASROption struct + Model string + ReqChanSize int + Url string + func NewWhisperASROption(url, model string) WhisperASROption + func (opt *WhisperASROption) GetVendor() Vendor + type WhisperResult struct + IsFinal bool + Text string + type Word struct + BlankDuration int + EndTime int + Pronounce string + StartTime int + Text string