Documentation
¶
Index ¶
- type FallbackAdapter
- func (f *FallbackAdapter) Capabilities() STTCapabilities
- func (f *FallbackAdapter) Label() string
- func (f *FallbackAdapter) Recognize(ctx context.Context, frames []*model.AudioFrame, language string) (*SpeechEvent, error)
- func (f *FallbackAdapter) Stream(ctx context.Context, language string) (RecognizeStream, error)
- type MultiSpeakerAdapter
- func (a *MultiSpeakerAdapter) Capabilities() STTCapabilities
- func (a *MultiSpeakerAdapter) Label() string
- func (a *MultiSpeakerAdapter) Recognize(ctx context.Context, frames []*model.AudioFrame, language string) (*SpeechEvent, error)
- func (a *MultiSpeakerAdapter) Stream(ctx context.Context, language string) (RecognizeStream, error)
- type PrimarySpeakerDetectionOptions
- type RecognizeStream
- type STT
- type STTCapabilities
- type SearchStream
- type SpeechData
- type SpeechEvent
- type SpeechEventType
- type StreamAdapter
- func (a *StreamAdapter) Capabilities() STTCapabilities
- func (a *StreamAdapter) Label() string
- func (a *StreamAdapter) Recognize(ctx context.Context, frames []*model.AudioFrame, language string) (*SpeechEvent, error)
- func (a *StreamAdapter) Stream(ctx context.Context, language string) (RecognizeStream, error)
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type FallbackAdapter ¶
type FallbackAdapter struct {
// contains filtered or unexported fields
}
func NewFallbackAdapter ¶
func NewFallbackAdapter(stts []STT) *FallbackAdapter
func (*FallbackAdapter) Capabilities ¶
func (f *FallbackAdapter) Capabilities() STTCapabilities
func (*FallbackAdapter) Label ¶
func (f *FallbackAdapter) Label() string
func (*FallbackAdapter) Recognize ¶
func (f *FallbackAdapter) Recognize(ctx context.Context, frames []*model.AudioFrame, language string) (*SpeechEvent, error)
func (*FallbackAdapter) Stream ¶
func (f *FallbackAdapter) Stream(ctx context.Context, language string) (RecognizeStream, error)
type MultiSpeakerAdapter ¶
type MultiSpeakerAdapter struct {
// contains filtered or unexported fields
}
func NewMultiSpeakerAdapter ¶
func NewMultiSpeakerAdapter(stt STT, detectPrimary bool, suppressBackground bool, primaryFormat string, backgroundFormat string, opt *PrimarySpeakerDetectionOptions) (*MultiSpeakerAdapter, error)
func (*MultiSpeakerAdapter) Capabilities ¶
func (a *MultiSpeakerAdapter) Capabilities() STTCapabilities
func (*MultiSpeakerAdapter) Label ¶
func (a *MultiSpeakerAdapter) Label() string
func (*MultiSpeakerAdapter) Recognize ¶
func (a *MultiSpeakerAdapter) Recognize(ctx context.Context, frames []*model.AudioFrame, language string) (*SpeechEvent, error)
func (*MultiSpeakerAdapter) Stream ¶
func (a *MultiSpeakerAdapter) Stream(ctx context.Context, language string) (RecognizeStream, error)
type PrimarySpeakerDetectionOptions ¶
type PrimarySpeakerDetectionOptions struct {
FrameSizeMs int
RMSBufferDuration float64
MinRMSSamples int
RMSSmoothingFactor float64
ThresholdMultiplier float64
DecayToEqualTime float64
ThresholdMinMultiplier float64
}
func DefaultPrimarySpeakerDetectionOptions ¶
func DefaultPrimarySpeakerDetectionOptions() PrimarySpeakerDetectionOptions
type RecognizeStream ¶
type RecognizeStream interface {
PushFrame(frame *model.AudioFrame) error
Flush() error
Close() error
Next() (*SpeechEvent, error)
}
type STT ¶
type STT interface {
Label() string
Capabilities() STTCapabilities
Stream(ctx context.Context, language string) (RecognizeStream, error)
Recognize(ctx context.Context, frames []*model.AudioFrame, language string) (*SpeechEvent, error)
}
type STTCapabilities ¶
type SearchStream ¶
type SearchStream interface {
PushFrame(frame *model.AudioFrame) error
Close() error
Next() (*SpeechEvent, error)
}
type SpeechData ¶
type SpeechEvent ¶
type SpeechEvent struct {
Type SpeechEventType
RequestID string
Alternatives []SpeechData
Interrupted bool
}
type SpeechEventType ¶
type SpeechEventType string
const ( SpeechEventStartOfSpeech SpeechEventType = "start_of_speech" SpeechEventInterimTranscript SpeechEventType = "interim_transcript" SpeechEventPreflightTranscript SpeechEventType = "preflight_transcript" SpeechEventFinalTranscript SpeechEventType = "final_transcript" SpeechEventRecognitionUsage SpeechEventType = "recognition_usage" SpeechEventEndOfSpeech SpeechEventType = "end_of_speech" )
type StreamAdapter ¶
type StreamAdapter struct {
// contains filtered or unexported fields
}
StreamAdapter converts a non-streaming STT into a streaming STT by coupling it with a VAD. It buffers audio frames and sends them to the underlying STT Recognize method when the VAD detects speech.
func NewStreamAdapter ¶
func NewStreamAdapter(stt STT, vad vad.VAD) *StreamAdapter
func (*StreamAdapter) Capabilities ¶
func (a *StreamAdapter) Capabilities() STTCapabilities
func (*StreamAdapter) Label ¶
func (a *StreamAdapter) Label() string
func (*StreamAdapter) Recognize ¶
func (a *StreamAdapter) Recognize(ctx context.Context, frames []*model.AudioFrame, language string) (*SpeechEvent, error)
func (*StreamAdapter) Stream ¶
func (a *StreamAdapter) Stream(ctx context.Context, language string) (RecognizeStream, error)
Click to show internal directories.
Click to hide internal directories.