Versions in this module Expand all Collapse all v1 v1.4.2 Jun 8, 2026 v1.4.1 Jun 8, 2026 v1.4.0 Jun 8, 2026 v1.3.3 Jun 8, 2026 v1.3.2 Jun 8, 2026 v1.3.1 Jun 8, 2026 v1.3.0 Jun 8, 2026 v1.2.0 Jun 6, 2026 Changes in this version + const CompressionGZIP + const DefaultSampleRate + const FlagNegSequence + const FlagNegWithSequence + const FlagNoSequence + const FlagPosSequence + const MessageTypeClientAudioOnlyRequest + const MessageTypeClientFullRequest + const MessageTypeServerErrorResponse + const MessageTypeServerFullResponse + const ProtocolVersionV1 + const SerializationJSON + const SerializationNone + const ServerAck + const ServerErrorResponse + const ServerFullResponse + const SuccessCode + var DefaultAudioOnlyWsHeader = []byte + var DefaultFullClientWsHeader = []byte + var DefaultLastAudioWsHeader = []byte + var ErrClientClosed = errors.New("asr client closed") + var FussyMap = map[string]string + func ASRFilterSenderName(senderName, direction string, h media.MediaHandler) string + func AudioIntercept() media.MediaHandlerFunc + func BuildAuthHeader(auth AuthConfig) http.Header + func ComputeSampleByteCount(sampleRate, bitDepth, channels int) int + func ConvertToWAV(audioPath string, sampleRate int) ([]byte, error) + func GenerateCorpusContext(hotwords []HotWord) string + func GzipCompress(input []byte) []byte + func GzipDecompress(input []byte) []byte + func IsWAVFile(data []byte) bool + func NewAudioOnlyRequest(seq int, segment []byte) []byte + func NewFullClientRequest(config *Config) []byte + func ReadWAVInfo(data []byte) (int, int, int, int, []byte, error) + func WithAwsASR(opt AwsASROption) media.MediaHandlerFunc + func WithBaiduASR(opt BaiduASROption) media.MediaHandlerFunc + func WithDeepgramASR(opt DeepgramASROption) media.MediaHandlerFunc + func WithQCloudASR(opt QCloudASROption) media.MediaHandlerFunc + func WithTranscribeFilter(asr SpeechRecognitionEngine, h media.MediaHandler, opt TranscribeOption) media.PacketFilter + func WithTranscribeFilterState(asr SpeechRecognitionEngine, h media.MediaHandler, opt TranscribeOption) media.PacketFilter + func WithTranscribeFilterStateV2(asr SpeechRecognitionEngine, h media.MediaHandler, opt TranscribeOption) media.PacketFilter + func WithVoiceapiASR(opt VoiceapiASROption) media.MediaHandlerFunc + func WithVolcengineASR(opt VolcengineOption) media.MediaHandlerFunc + func WithWhisperASR(opt WhisperASROption) media.MediaHandlerFunc + type AsrCorrector struct + FuzzyWords map[string]string + ReplaceWords map[string]string + func NewAsrCorrector(opt AsrCorrectorOption) *AsrCorrector + func (ac *AsrCorrector) Correct(text string) string + func (ac *AsrCorrector) SegmentWords(s string) []string + type AsrCorrectorOption struct + FuzzyWords map[string]string + ReplaceWords map[string]string + type AudioConfig struct + Bits int + Channel int + Codec string + Format string + Rate int + type AudioFrame struct + Data []byte + IsEnd bool + type AudioMeta struct + Bits int + Channel int + Codec string + Format string + Rate int + type AuthConfig struct + AccessKey string + AppKey string + ResourceId string + type AwsASR struct + type AwsASROption struct + AppID string + Encoding types.MediaEncoding + Region string + ReqChanSize int + SampleRate int32 + func NewAwsASROption(appId, region string, language string) AwsASROption + func (opt *AwsASROption) GetVendor() Vendor + type BaiduASR struct + Sentence string + type BaiduASRBeginParam struct + Data Data + Type string + func NewBeginParam(opt BaiduASROption) BaiduASRBeginParam + type BaiduASROption struct + AppID int + AppKey string + CuId string + DevPid int + Format string + LmId int + ReqChanSize int + Sample int + Url string + func NewBaiduASROption(appId int, appKey string, devPid int, format string, sample int) BaiduASROption + func (opt *BaiduASROption) GetVendor() Vendor + type BaiduASRWSResponse struct + EndTime int + ErrMsg string + ErrNo int + LogId int + Result string + Sn string + StartTime int + Type string + type BufferConfig struct + MaxBufferSize int + SegmentDurationMs int + type Client struct + func NewClient(config *Config) *Client + func (c *Client) Close() + func (c *Client) Connect(ctx context.Context) error + func (c *Client) GetTraceID() string + func (c *Client) IsClosed() bool + func (c *Client) ReceiveResult() (*Response, error) + func (c *Client) SendAudioFrame(frame *AudioFrame) error + func (c *Client) SetErrorCallback(handler func(error)) + func (c *Client) SetTimeouts(sendTimeout, recvTimeout time.Duration) + type CompressionType byte + type Config struct + Audio AudioConfig + Auth AuthConfig + Buffer BufferConfig + Request RequestConfig + URL string + User UserConfig + func DefaultConfig() *Config + func (c *Config) CalculateBufferSize() int + func (c *Config) WithAudio(audio AudioConfig) *Config + func (c *Config) WithAuth(auth AuthConfig) *Config + func (c *Config) WithBuffer(buffer BufferConfig) *Config + func (c *Config) WithRequest(request RequestConfig) *Config + func (c *Config) WithURL(url string) *Config + func (c *Config) WithUser(user UserConfig) *Config + type ConfigReader struct + func NewConfigReader(config map[string]interface{}) *ConfigReader + func (r *ConfigReader) Int(keys ...interface{}) int + func (r *ConfigReader) String(keysAndDefault ...string) string + type CorpusConfig struct + BoostingTableName string + Context string + CorrectTableName string + type CorpusMeta struct + BoostingTableName string + Context string + CorrectTableName string + type Data struct + AppId int + AppKey string + CuId string + DevPid int + Format string + LmId int + Sample int + type DeepgramASR struct + EndTime uint32 + Sentence string + func (dg *DeepgramASR) Close(cr *interfacesv1.CloseResponse) error + func (dg *DeepgramASR) Error(er *interfacesv1.ErrorResponse) error + func (dg *DeepgramASR) Message(mr *interfacesv1.MessageResponse) error + func (dg *DeepgramASR) Metadata(md *interfacesv1.MetadataResponse) error + func (dg *DeepgramASR) Open(or *interfacesv1.OpenResponse) error + func (dg *DeepgramASR) SpeechStarted(ssr *interfacesv1.SpeechStartedResponse) error + func (dg *DeepgramASR) UnhandledEvent(byData []byte) error + func (dg *DeepgramASR) UtteranceEnd(ur *interfacesv1.UtteranceEndResponse) error + type DeepgramASROption struct + ApiKey string + Channels int + Encoding string + KeepAliveDuration string + Language string + Model string + ReqChanSize int + SampleRate int + func NewDeepgramASROption(apiKey string, model string, language string) DeepgramASROption + func (opt *DeepgramASROption) GetVendor() Vendor + type DefaultTranscriberFactory struct + func GetGlobalFactory() *DefaultTranscriberFactory + func NewTranscriberFactory() *DefaultTranscriberFactory + func (f *DefaultTranscriberFactory) CreateTranscriber(config TranscriberConfig) (SpeechRecognitionEngine, error) + func (f *DefaultTranscriberFactory) GetSupportedVendors() []Vendor + func (f *DefaultTranscriberFactory) IsVendorSupported(vendor Vendor) bool + func (f *DefaultTranscriberFactory) RegisterCreator(vendor Vendor, ...) + type Event struct + Header FunHeader + Payload FunPayload + type FunASRCallback struct + func NewFunASR(opt FunASROption) FunASRCallback + func (fun *FunASRCallback) Activity() bool + func (fun *FunASRCallback) ConnAndReceive(dialogID string) error + func (fun *FunASRCallback) Init(tr SpeechRecognitionResult, er RecognitionError) + func (fun *FunASRCallback) RestartClient() + func (fun *FunASRCallback) SendAudioBytes(data []byte) error + func (fun *FunASRCallback) SendEnd() error + func (fun *FunASRCallback) StopConn() error + func (fun *FunASRCallback) Vendor() string + type FunASRClient struct + type FunASRMessage struct + IsFinal bool + Mode string + Text string + type FunASROption struct + AudioFs int + ChunkInterval int + ChunkSize []int + DecoderChunkLookBack int + EncoderChunkLookBack int + Hotwords string + IsSpeaking bool + Itn bool + Mode string + ReqChanSize int + Url string + WavFormat string + WavName string + func NewFunASROption(url string) FunASROption + func (opt *FunASROption) GetVendor() Vendor + type FunASRRequestOption struct + AudioFs int + ChunkInterval int + ChunkSize []int + DecoderChunkLookBack int + EncoderChunkLookBack int + Hotwords string + IsSpeaking bool + Itn bool + Mode string + WavFormat string + WavName string + func NewFunASRRequestOption(opt FunASROption) FunASRRequestOption + type FunAsrRealtime struct + Handler media.MediaHandler + func NewFunAsrRealtime(opt FunAsrRealtimeOption) FunAsrRealtime + func (fun *FunAsrRealtime) Activity() bool + func (fun *FunAsrRealtime) ConnAndReceive(dialogID string) error + func (fun *FunAsrRealtime) Init(tr SpeechRecognitionResult, er RecognitionError) + func (fun *FunAsrRealtime) RestartClient() + func (fun *FunAsrRealtime) SendAudioBytes(data []byte) error + func (fun *FunAsrRealtime) SendEnd() error + func (fun *FunAsrRealtime) StopConn() error + func (fun *FunAsrRealtime) Vendor() string + type FunAsrRealtimeClient struct + type FunAsrRealtimeOption struct + ApiKey string + DisfluencyRemovalEnabled bool + EnableITN bool + EnableWords bool + Format string + Heartbeat bool + LanguageHints string + MaxSentenceSilence uint + Model string + SampleRate int + Url string + func (opt *FunAsrRealtimeOption) GetVendor() Vendor + type FunHeader struct + Action string + Attributes map[string]interface{} + ErrorCode string + ErrorMessage string + Event string + Streaming string + TaskID string + type FunPayload struct + Function string + Input Input + Model string + Output Output + Parameters Params + Task string + TaskGroup string + Usage ... + type GladiaASR struct + Sentence string + func NewGladiaASR(opt GladiaASROption) GladiaASR + func (gla *GladiaASR) Activity() bool + func (gla *GladiaASR) ConnAndReceive(dialogID string) error + func (gla *GladiaASR) Init(tr SpeechRecognitionResult, er RecognitionError) + func (gla *GladiaASR) RestartClient() + func (gla *GladiaASR) SendAudioBytes(data []byte) error + func (gla *GladiaASR) SendEnd() error + func (gla *GladiaASR) StopConn() error + func (gla *GladiaASR) Vendor() string + type GladiaASROption struct + ApiKey string + Encoding string + ReqChanSize int + Url string + func NewGladiaASROption(apiKey string, encoding string) GladiaASROption + func (opt *GladiaASROption) GetVendor() Vendor + type GladiaUtterance struct + Confidence float64 + ID int + Language string + Stable bool + TimeBegin float64 + TimeEnd float64 + Transcription string + type GoogleASR struct + Sentence string + func NewGoogleASR(opt GoogleASROption) GoogleASR + func (google *GoogleASR) Activity() bool + func (google *GoogleASR) ConnAndReceive(dialogID string) error + func (google *GoogleASR) Init(tr SpeechRecognitionResult, er RecognitionError) + func (google *GoogleASR) RestartClient() + func (google *GoogleASR) SendAudioBytes(data []byte) error + func (google *GoogleASR) SendEnd() error + func (google *GoogleASR) StopConn() error + func (google *GoogleASR) Vendor() string + type GoogleASROption struct + Encoding speechpb.RecognitionConfig_AudioEncoding + LanguageCode string + ReqChainSize int + SampleRateHertz int32 + func NewGoogleASROption(encoding speechpb.RecognitionConfig_AudioEncoding, sampleRateHertz int32, ...) GoogleASROption + func (opt *GoogleASROption) GetVendor() Vendor + type HotWord struct + Weight int + Word string + type Input struct + type LocalASRConfig struct + BitDepth int + BufferSize int + Channels int + Command string + EnableVAD bool + Language string + ModelPath string + Provider LocalASRProvider + SampleRate int + VADThreshold float32 + func NewLocalASRConfig(provider LocalASRProvider, modelPath string) *LocalASRConfig + func (opt *LocalASRConfig) GetVendor() Vendor + type LocalASRProvider string + const LocalASRProviderLocal + const LocalASRProviderWhisperCpp + type LocalASRService struct + func NewLocalASRService(config *LocalASRConfig) (*LocalASRService, error) + func (s *LocalASRService) Activity() bool + func (s *LocalASRService) Close() error + func (s *LocalASRService) ConnAndReceive(dialogId string) error + func (s *LocalASRService) Init(tr SpeechRecognitionResult, er RecognitionError) + func (s *LocalASRService) RestartClient() + func (s *LocalASRService) SendAudioBytes(data []byte) error + func (s *LocalASRService) SendEnd() error + func (s *LocalASRService) StopConn() error + func (s *LocalASRService) Vendor() string + type MessageType byte + type MessageTypeSpecificFlags byte + type Output struct + Sentence struct{ ... } + type Params struct + DisfluencyRemovalEnabled bool + Format string + Heartbeat bool + LanguageHints []string + MaxSentenceSilence uint + SampleRate int + VocabularyID string + type ProtocolHeader struct + func NewDefaultHeader() *ProtocolHeader + func (h *ProtocolHeader) Serialize() []byte + func (h *ProtocolHeader) SetCompressionType(compType CompressionType) *ProtocolHeader + func (h *ProtocolHeader) SetMessageType(msgType MessageType) *ProtocolHeader + func (h *ProtocolHeader) SetMessageTypeFlags(flags MessageTypeSpecificFlags) *ProtocolHeader + func (h *ProtocolHeader) SetReservedData(data []byte) *ProtocolHeader + func (h *ProtocolHeader) SetSerializationType(serType SerializationType) *ProtocolHeader + type ProtocolVersion byte + type QCloudASR struct + Handler media.MediaHandler + func NewQcloudASR(opt QCloudASROption) *QCloudASR + func (asq *QCloudASR) Activity() bool + func (asq *QCloudASR) ConnAndReceive(dialogID string) error + func (asq *QCloudASR) Init(tr SpeechRecognitionResult, er RecognitionError) + func (asq *QCloudASR) OnFail(response *asr.SpeechRecognitionResponse, err error) + func (asq *QCloudASR) OnRecognitionComplete(response *asr.SpeechRecognitionResponse) + func (asq *QCloudASR) OnRecognitionResultChange(response *asr.SpeechRecognitionResponse) + func (asq *QCloudASR) OnRecognitionStart(response *asr.SpeechRecognitionResponse) + func (asq *QCloudASR) OnSentenceBegin(response *asr.SpeechRecognitionResponse) + func (asq *QCloudASR) OnSentenceEnd(response *asr.SpeechRecognitionResponse) + func (asq *QCloudASR) RestartClient() + func (asq *QCloudASR) SendAudioBytes(data []byte) error + func (asq *QCloudASR) SendEnd() error + func (asq *QCloudASR) StopConn() error + func (asq *QCloudASR) Vendor() string + type QCloudASROption struct + AppID string + Format int + HotWords []HotWord + ModelType string + ReqChanSize int + SecretID string + SecretKey string + func NewQcloudASROption(appId string, secretId string, secretKey string) QCloudASROption + func (opt *QCloudASROption) GetVendor() Vendor + func (opt QCloudASROption) String() string + type RecognitionError func(err error, isFatal bool) + type Recognizer struct + func NewRecognizer(config *Config) *Recognizer + func (r *Recognizer) GetTraceID() string + func (r *Recognizer) OnError(callback onErrorFunc) + func (r *Recognizer) OnResult(callback onResultFunc) + func (r *Recognizer) SendAudioFrame(frame []byte, end bool) error + func (r *Recognizer) Start() error + func (r *Recognizer) Stop() + type RequestConfig struct + Corpus CorpusConfig + EnableDDC bool + EnableITN bool + EnableNonstream bool + EnablePUNC bool + ModelName string + ShowUtterances bool + type RequestMeta struct + Corpus CorpusMeta + EnableDDC bool + EnableITN bool + EnableNonstream bool + EnablePUNC bool + ModelName string + ShowUtterances bool + type RequestPayload struct + Audio AudioMeta + Request RequestMeta + User UserMeta + type Response struct + Code int + Err error + Event int + IsLastPackage bool + PayloadMsg *ResponsePayload + PayloadSequence int32 + PayloadSize int + func ParseResponse(msg []byte) *Response + type ResponsePayload struct + AudioInfo struct{ ... } + Error string + Result struct{ ... } + type Result struct + Error error + IsFinal bool + Text string + Timestamp time.Time + type ResultCallback func(*Result) + type RingBuffer struct + func NewRingBuffer(size int) *RingBuffer + func (r *RingBuffer) Read(n int) []byte + func (r *RingBuffer) Write(data []byte) int + type SerializationType byte + type SpeechRecognitionEngine interface + Activity func() bool + ConnAndReceive func(dialogId string) error + Init func(resultCallback SpeechRecognitionResult, errorCallback RecognitionError) + RestartClient func() + SendAudioBytes func(data []byte) error + SendEnd func() error + StopConn func() error + Vendor func() string + type SpeechRecognitionResult func(text string, isLast bool, duration time.Duration, uuid string) + type TimeoutConfig struct + Read time.Duration + Send time.Duration + type TranscribeOneShot struct + Padding int + type TranscribeOption struct + AsrOptions map[string]any + Direction string + FuzzyOptions AsrCorrectorOption + type TranscriberConfig interface + GetVendor func() Vendor + func NewTranscriberConfigFromMap(provider string, config map[string]interface{}, language string) (TranscriberConfig, error) + type TranscriberFactory interface + CreateTranscriber func(config TranscriberConfig) (SpeechRecognitionEngine, error) + GetSupportedVendors func() []Vendor + IsVendorSupported func(vendor Vendor) bool + type Transcript struct + Code string + Confidence float64 + Duration float64 + Event string + InferenceTime float64 + Language string + Message string + RequestID string + TimeBegin float64 + TimeEnd float64 + Transcription string + Type string + Utterances []GladiaUtterance + type UserConfig struct + APPVersion string + DID string + Platform string + SDKVersion string + UID string + type UserMeta struct + APPVersion string + DID string + Platform string + SDKVersion string + UID string + type Utterance struct + Definite bool + EndTime int + Language string + StartTime int + Text string + Words []Word + type Vendor string + const VendorAWS + const VendorAliyun + const VendorBaidu + const VendorDeepgram + const VendorFunASR + const VendorFunASRRealtime + const VendorGladia + const VendorGoogle + const VendorLocal + const VendorQCloud + const VendorVoiceAPI + const VendorVolcengine + const VendorVolcengineLLM + const VendorWhisper + const VendorXfyunMul + func GetVendor(provider string) Vendor + type VoiceapiASR struct + Sentence string + type VoiceapiASROption struct + ReqChanSize int + Url string + func NewVoiceapiASROption(url string) VoiceapiASROption + func (opt *VoiceapiASROption) GetVendor() Vendor + type VoiceapiResponse struct + Finished bool + Idx int + Text string + type VolcEngineResponse struct + Code int + Message string + Reqid string + Results []VolcengineResult + Sequence int + type Volcengine struct + Sentence string + type VolcengineClient struct + func (c *VolcengineClient) String() string + type VolcengineLLMASR struct + func NewVolcengineLLM(opt VolcengineLLMOption) VolcengineLLMASR + func (v *VolcengineLLMASR) Activity() bool + func (v *VolcengineLLMASR) ConnAndReceive(dialogID string) error + func (v *VolcengineLLMASR) Init(tr SpeechRecognitionResult, er RecognitionError) + func (v *VolcengineLLMASR) RestartClient() + func (v *VolcengineLLMASR) SendAudioBytes(data []byte) error + func (v *VolcengineLLMASR) SendEnd() error + func (v *VolcengineLLMASR) StopConn() error + func (v *VolcengineLLMASR) Vendor() string + type VolcengineLLMOption struct + AccessToken string + AppID string + BitDepth int + Channel int + Codec string + Format string + HotWords []HotWord + ReqChanSize int + ResourceId string + SampleRate int + Url string + func NewVolcengineLLMOption(token, appID string) VolcengineLLMOption + func (opt *VolcengineLLMOption) GetVendor() Vendor + type VolcengineOption struct + AppID string + Cluster string + Codec string + Format string + ReqChanSize int + Token string + Url string + WorkFlow string + func NewVolcengineOption(appId string, token string, cluster string, format string) VolcengineOption + func (opt *VolcengineOption) GetVendor() Vendor + type VolcengineResult struct + Confidence int + Language string + Text string + Utterances []Utterance + type WAVHeader struct + AudioFormat uint16 + BitsPerSample uint16 + BlockAlign uint16 + ByteRate uint32 + ChunkID [4]byte + ChunkSize uint32 + Format [4]byte + NumChannels uint16 + SampleRate uint32 + Subchunk1ID [4]byte + Subchunk1Size uint32 + Subchunk2ID [4]byte + Subchunk2Size uint32 + type WhisperASR struct + EndTime uint32 + Sentence string + type WhisperASROption struct + Model string + ReqChanSize int + Url string + func NewWhisperASROption(url, model string) WhisperASROption + func (opt *WhisperASROption) GetVendor() Vendor + type WhisperResult struct + IsFinal bool + Text string + type Word struct + BlankDuration int + EndTime int + Pronounce string + StartTime int + Text string