Documentation
¶
Index ¶
- type GoogleSTTProvider
- type HuggingFaceProvider
- type LocalProvider
- func (p *LocalProvider) Health(ctx context.Context) error
- func (p *LocalProvider) IsReady() bool
- func (p *LocalProvider) Name() string
- func (p *LocalProvider) StartServer(ctx context.Context) error
- func (p *LocalProvider) StopServer()
- func (p *LocalProvider) Transcribe(ctx context.Context, audio []byte, opts TranscribeOpts) (*Result, error)
- type OpenAICompatibleProvider
- type Result
- type STTProvider
- type TranscribeOpts
- type VPSProvider
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type GoogleSTTProvider ¶
type GoogleSTTProvider struct {
APIKey string
Model string // "chirp_3", "chirp_2", "latest_long"
BaseURL string // Override for testing; defaults to googleSTTBaseURL
// contains filtered or unexported fields
}
GoogleSTTProvider implements STTProvider for Google Cloud Speech-to-Text v1 REST API.
func NewGoogleSTTProvider ¶
func NewGoogleSTTProvider(apiKey, model string) *GoogleSTTProvider
NewGoogleSTTProvider creates a provider for Google Cloud Speech-to-Text. Model defaults to "chirp_3" if empty.
func (*GoogleSTTProvider) Health ¶
func (p *GoogleSTTProvider) Health(ctx context.Context) error
Health checks if the Google Speech API is reachable.
func (*GoogleSTTProvider) Name ¶
func (p *GoogleSTTProvider) Name() string
Name returns the provider identifier.
func (*GoogleSTTProvider) Transcribe ¶
func (p *GoogleSTTProvider) Transcribe(ctx context.Context, audio []byte, opts TranscribeOpts) (*Result, error)
Transcribe sends audio to Google Cloud Speech-to-Text v1 REST API.
type HuggingFaceProvider ¶
type HuggingFaceProvider struct {
Model string
Token string
BaseURL string // Override for testing; defaults to hfBaseURL
// contains filtered or unexported fields
}
HuggingFaceProvider implements STTProvider for Tier 3: HuggingFace Inference API.
func NewHuggingFaceProvider ¶
func NewHuggingFaceProvider(model, token string) *HuggingFaceProvider
func (*HuggingFaceProvider) Health ¶
func (p *HuggingFaceProvider) Health(ctx context.Context) error
func (*HuggingFaceProvider) Name ¶
func (p *HuggingFaceProvider) Name() string
func (*HuggingFaceProvider) Transcribe ¶
func (p *HuggingFaceProvider) Transcribe(ctx context.Context, audio []byte, opts TranscribeOpts) (*Result, error)
type LocalProvider ¶
type LocalProvider struct {
BaseURL string // e.g. "http://127.0.0.1:8080"
Port int
ModelPath string
GPU string
// contains filtered or unexported fields
}
LocalProvider implements STTProvider for Tier 1: localhost whisper.cpp server.
func NewLocalProvider ¶
func NewLocalProvider(port int, modelPath, gpu string) *LocalProvider
func (*LocalProvider) IsReady ¶
func (p *LocalProvider) IsReady() bool
IsReady returns true if the whisper-server subprocess is running and responding.
func (*LocalProvider) Name ¶
func (p *LocalProvider) Name() string
func (*LocalProvider) StartServer ¶
func (p *LocalProvider) StartServer(ctx context.Context) error
StartServer starts the whisper.cpp server subprocess. Blocks until ready or context cancelled.
func (*LocalProvider) StopServer ¶
func (p *LocalProvider) StopServer()
StopServer terminates the whisper-server subprocess.
func (*LocalProvider) Transcribe ¶
func (p *LocalProvider) Transcribe(ctx context.Context, audio []byte, opts TranscribeOpts) (*Result, error)
type OpenAICompatibleProvider ¶
type OpenAICompatibleProvider struct {
BaseURL string
APIKey string
Model string
// contains filtered or unexported fields
}
OpenAICompatibleProvider implements STTProvider for any endpoint speaking the OpenAI /v1/audio/transcriptions API (OpenAI, Groq, VPS whisper-server, etc.).
func NewGroqSTTProvider ¶
func NewGroqSTTProvider(apiKey string) *OpenAICompatibleProvider
NewGroqSTTProvider creates a provider for the Groq Whisper API.
func NewOpenAICompatibleProvider ¶
func NewOpenAICompatibleProvider(name, baseURL, apiKey, model string) *OpenAICompatibleProvider
NewOpenAICompatibleProvider creates a provider for any OpenAI-compatible STT endpoint.
func NewOpenAISTTProvider ¶
func NewOpenAISTTProvider(apiKey string) *OpenAICompatibleProvider
NewOpenAISTTProvider creates a provider for the OpenAI Whisper API.
func NewVPSProvider ¶
func NewVPSProvider(baseURL, apiKey string) *OpenAICompatibleProvider
NewVPSProvider creates a provider for a self-hosted whisper-server.
func (*OpenAICompatibleProvider) Health ¶
func (p *OpenAICompatibleProvider) Health(ctx context.Context) error
Health checks provider reachability. Tries GET /health first (whisper-server), then falls back to GET /v1/models (OpenAI, Groq).
func (*OpenAICompatibleProvider) Name ¶
func (p *OpenAICompatibleProvider) Name() string
Name returns the provider identifier.
func (*OpenAICompatibleProvider) Transcribe ¶
func (p *OpenAICompatibleProvider) Transcribe(ctx context.Context, audio []byte, opts TranscribeOpts) (*Result, error)
Transcribe sends audio to the OpenAI-compatible /v1/audio/transcriptions endpoint.
type Result ¶
type Result struct {
Text string
Language string
Duration time.Duration
Provider string
Model string
Confidence float64 // If available from the provider
}
Result holds the output of a transcription.
type STTProvider ¶
type STTProvider interface {
// Transcribe sends audio data to the STT backend and returns the transcription.
Transcribe(ctx context.Context, audio []byte, opts TranscribeOpts) (*Result, error)
// Name returns the provider identifier (e.g. "local", "vps", "huggingface").
Name() string
// Health checks if the provider is reachable and ready.
Health(ctx context.Context) error
}
STTProvider defines the interface for all speech-to-text backends. All implementations speak the OpenAI-compatible /v1/audio/transcriptions API.
type TranscribeOpts ¶
type TranscribeOpts struct {
Language string // "de", "en", "auto"
Model string // Optional: model override
}
TranscribeOpts configures a single transcription request.
type VPSProvider ¶
type VPSProvider = OpenAICompatibleProvider
VPSProvider is an alias for backward compatibility. Use OpenAICompatibleProvider directly for new code.