stt

package
v0.0.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 14, 2026 License: MIT Imports: 9 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type FallbackAdapter

type FallbackAdapter struct {
	// contains filtered or unexported fields
}

func NewFallbackAdapter

func NewFallbackAdapter(stts []STT) *FallbackAdapter

func (*FallbackAdapter) Capabilities

func (f *FallbackAdapter) Capabilities() STTCapabilities

func (*FallbackAdapter) Label

func (f *FallbackAdapter) Label() string

func (*FallbackAdapter) Recognize

func (f *FallbackAdapter) Recognize(ctx context.Context, frames []*model.AudioFrame, language string) (*SpeechEvent, error)

func (*FallbackAdapter) Stream

func (f *FallbackAdapter) Stream(ctx context.Context, language string) (RecognizeStream, error)

type MultiSpeakerAdapter

type MultiSpeakerAdapter struct {
	// contains filtered or unexported fields
}

func NewMultiSpeakerAdapter

func NewMultiSpeakerAdapter(stt STT, detectPrimary bool, suppressBackground bool, primaryFormat string, backgroundFormat string, opt *PrimarySpeakerDetectionOptions) (*MultiSpeakerAdapter, error)

func (*MultiSpeakerAdapter) Capabilities

func (a *MultiSpeakerAdapter) Capabilities() STTCapabilities

func (*MultiSpeakerAdapter) Label

func (a *MultiSpeakerAdapter) Label() string

func (*MultiSpeakerAdapter) Recognize

func (a *MultiSpeakerAdapter) Recognize(ctx context.Context, frames []*model.AudioFrame, language string) (*SpeechEvent, error)

func (*MultiSpeakerAdapter) Stream

func (a *MultiSpeakerAdapter) Stream(ctx context.Context, language string) (RecognizeStream, error)

type PrimarySpeakerDetectionOptions

type PrimarySpeakerDetectionOptions struct {
	FrameSizeMs            int
	RMSBufferDuration      float64
	MinRMSSamples          int
	RMSSmoothingFactor     float64
	ThresholdMultiplier    float64
	DecayToEqualTime       float64
	ThresholdMinMultiplier float64
}

func DefaultPrimarySpeakerDetectionOptions

func DefaultPrimarySpeakerDetectionOptions() PrimarySpeakerDetectionOptions

type RecognizeStream

type RecognizeStream interface {
	PushFrame(frame *model.AudioFrame) error
	Flush() error
	Close() error
	Next() (*SpeechEvent, error)
}

type STT

type STT interface {
	Label() string
	Capabilities() STTCapabilities
	Stream(ctx context.Context, language string) (RecognizeStream, error)
	Recognize(ctx context.Context, frames []*model.AudioFrame, language string) (*SpeechEvent, error)
}

type STTCapabilities

type STTCapabilities struct {
	Streaming        bool
	InterimResults   bool
	Diarization      bool
	OfflineRecognize bool
}

type SearchStream

type SearchStream interface {
	PushFrame(frame *model.AudioFrame) error
	Close() error
	Next() (*SpeechEvent, error)
}

type SpeechData

type SpeechData struct {
	Language         string
	Text             string
	StartTime        float64
	EndTime          float64
	Confidence       float64
	SpeakerID        string
	IsPrimarySpeaker *bool
}

type SpeechEvent

type SpeechEvent struct {
	Type         SpeechEventType
	RequestID    string
	Alternatives []SpeechData
	Interrupted  bool
}

type SpeechEventType

type SpeechEventType string
const (
	SpeechEventStartOfSpeech       SpeechEventType = "start_of_speech"
	SpeechEventInterimTranscript   SpeechEventType = "interim_transcript"
	SpeechEventPreflightTranscript SpeechEventType = "preflight_transcript"
	SpeechEventFinalTranscript     SpeechEventType = "final_transcript"
	SpeechEventRecognitionUsage    SpeechEventType = "recognition_usage"
	SpeechEventEndOfSpeech         SpeechEventType = "end_of_speech"
)

type StreamAdapter

type StreamAdapter struct {
	// contains filtered or unexported fields
}

StreamAdapter converts a non-streaming STT into a streaming STT by coupling it with a VAD. It buffers audio frames and sends them to the underlying STT Recognize method when the VAD detects speech.

func NewStreamAdapter

func NewStreamAdapter(stt STT, vad vad.VAD) *StreamAdapter

func (*StreamAdapter) Capabilities

func (a *StreamAdapter) Capabilities() STTCapabilities

func (*StreamAdapter) Label

func (a *StreamAdapter) Label() string

func (*StreamAdapter) Recognize

func (a *StreamAdapter) Recognize(ctx context.Context, frames []*model.AudioFrame, language string) (*SpeechEvent, error)

func (*StreamAdapter) Stream

func (a *StreamAdapter) Stream(ctx context.Context, language string) (RecognizeStream, error)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL