Documentation
¶
Overview ¶
Package vad provides unified voice activity detection (VAD) interface supporting multiple providers (HTTP, WebSocket) with session management and health checks.
Supported Providers:
- HTTP: Traditional HTTP-based VAD service
- WebSocket: Real-time WebSocket-based VAD service
Usage:
// Create factory
factory := vad.NewDefaultFactory(logger)
// Create detector and session manager
config := &vad.Config{
Provider: vad.ProviderHTTP,
BaseURL: "http://localhost:8080",
Timeout: 10 * time.Second,
SessionTTL: 5 * time.Minute,
}
detector, manager, err := factory.CreateDetectorAndManager(config)
if err != nil {
log.Fatal(err)
}
defer detector.Close()
defer manager.Close()
// Process audio
result, err := manager.ProcessAudio(ctx, sessionID, audioData, "pcm")
if err != nil {
log.Fatal(err)
}
if result.HaveVoice {
// Handle voice activity
}
Index ¶
- type Config
- type DefaultFactory
- func (f *DefaultFactory) CreateDetector(config *Config) (Detector, error)
- func (f *DefaultFactory) CreateDetectorAndManager(config *Config) (Detector, SessionManager, error)
- func (f *DefaultFactory) CreateSessionManager(detector Detector, config *Config) (SessionManager, error)
- func (f *DefaultFactory) GetSupportedProviders() []Provider
- func (f *DefaultFactory) ValidateConfig(config *Config) error
- type DefaultSessionManager
- func (sm *DefaultSessionManager) Close() error
- func (sm *DefaultSessionManager) DeleteSession(ctx context.Context, sessionID string) error
- func (sm *DefaultSessionManager) GetOrCreateSession(sessionID string) *Session
- func (sm *DefaultSessionManager) GetSession(sessionID string) *Session
- func (sm *DefaultSessionManager) GetStats() map[string]interface{}
- func (sm *DefaultSessionManager) ListSessions() []string
- func (sm *DefaultSessionManager) ProcessAudio(ctx context.Context, sessionID string, audioData []byte, format string, ...) (*DetectResponse, error)
- func (sm *DefaultSessionManager) ResetSession(ctx context.Context, sessionID string) error
- func (sm *DefaultSessionManager) SetTTL(ttl time.Duration)
- type DetectRequest
- type DetectResponse
- type Detector
- type DetectorOptions
- type Factory
- type HTTPDetector
- func (d *HTTPDetector) Close() error
- func (d *HTTPDetector) Detect(ctx context.Context, req *DetectRequest) (*DetectResponse, error)
- func (d *HTTPDetector) HealthCheck(ctx context.Context) error
- func (d *HTTPDetector) Provider() Provider
- func (d *HTTPDetector) ResetSession(ctx context.Context, sessionID string) error
- func (d *HTTPDetector) SetTimeout(timeout time.Duration)
- type HealthResponse
- type Provider
- type RMSDetector
- type Session
- type SessionManager
- type SessionManagerOptions
- type WebSocketDetector
- func (d *WebSocketDetector) Close() error
- func (d *WebSocketDetector) Detect(ctx context.Context, req *DetectRequest) (*DetectResponse, error)
- func (d *WebSocketDetector) HealthCheck(ctx context.Context) error
- func (d *WebSocketDetector) IsConnected() bool
- func (d *WebSocketDetector) Provider() Provider
- func (d *WebSocketDetector) ResetSession(ctx context.Context, sessionID string) error
- type WebSocketMessage
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Config ¶
type Config struct {
Provider Provider `json:"provider"` // 提供商
BaseURL string `json:"base_url"` // 基础 URL(HTTP 提供商)
Timeout time.Duration `json:"timeout"` // 超时时间
SessionTTL time.Duration `json:"session_ttl"` // 会话过期时间
Options map[string]interface{} `json:"options"` // 提供商特定选项
}
Config VAD 配置
type DefaultFactory ¶
type DefaultFactory struct {
// contains filtered or unexported fields
}
DefaultFactory 默认工厂实现
func NewDefaultFactory ¶
func NewDefaultFactory(logger *zap.Logger) *DefaultFactory
NewDefaultFactory 创建新的工厂
func (*DefaultFactory) CreateDetector ¶
func (f *DefaultFactory) CreateDetector(config *Config) (Detector, error)
CreateDetector 创建检测器
func (*DefaultFactory) CreateDetectorAndManager ¶
func (f *DefaultFactory) CreateDetectorAndManager(config *Config) (Detector, SessionManager, error)
CreateDetectorAndManager 创建检测器和会话管理器
func (*DefaultFactory) CreateSessionManager ¶
func (f *DefaultFactory) CreateSessionManager(detector Detector, config *Config) (SessionManager, error)
CreateSessionManager 创建会话管理器
func (*DefaultFactory) GetSupportedProviders ¶
func (f *DefaultFactory) GetSupportedProviders() []Provider
GetSupportedProviders 获取支持的提供商列表
func (*DefaultFactory) ValidateConfig ¶
func (f *DefaultFactory) ValidateConfig(config *Config) error
ValidateConfig 验证配置
type DefaultSessionManager ¶
type DefaultSessionManager struct {
// contains filtered or unexported fields
}
DefaultSessionManager 默认会话管理器实现
func NewDefaultSessionManager ¶
func NewDefaultSessionManager(detector Detector, config *Config, logger *zap.Logger) (*DefaultSessionManager, error)
NewDefaultSessionManager 创建新的会话管理器
func (*DefaultSessionManager) DeleteSession ¶
func (sm *DefaultSessionManager) DeleteSession(ctx context.Context, sessionID string) error
DeleteSession 删除会话
func (*DefaultSessionManager) GetOrCreateSession ¶
func (sm *DefaultSessionManager) GetOrCreateSession(sessionID string) *Session
GetOrCreateSession 获取或创建会话
func (*DefaultSessionManager) GetSession ¶
func (sm *DefaultSessionManager) GetSession(sessionID string) *Session
GetSession 获取会话
func (*DefaultSessionManager) GetStats ¶
func (sm *DefaultSessionManager) GetStats() map[string]interface{}
GetStats 获取统计信息
func (*DefaultSessionManager) ListSessions ¶
func (sm *DefaultSessionManager) ListSessions() []string
ListSessions 列出所有活跃会话
func (*DefaultSessionManager) ProcessAudio ¶
func (sm *DefaultSessionManager) ProcessAudio( ctx context.Context, sessionID string, audioData []byte, format string, threshold ...float64, ) (*DetectResponse, error)
ProcessAudio 处理音频数据
func (*DefaultSessionManager) ResetSession ¶
func (sm *DefaultSessionManager) ResetSession(ctx context.Context, sessionID string) error
ResetSession 重置会话
func (*DefaultSessionManager) SetTTL ¶
func (sm *DefaultSessionManager) SetTTL(ttl time.Duration)
SetTTL 设置会话过期时间
type DetectRequest ¶
type DetectRequest struct {
AudioData []byte `json:"audio_data,omitempty"` // 音频数据
AudioFormat string `json:"audio_format"` // "pcm" 或 "opus"
SampleRate int `json:"sample_rate"` // 采样率
Channels int `json:"channels"` // 声道数
Threshold float64 `json:"threshold,omitempty"` // VAD 阈值(可选)
SessionID string `json:"session_id,omitempty"` // 会话 ID
Timestamp time.Time `json:"timestamp,omitempty"` // 时间戳
}
DetectRequest VAD 检测请求
type DetectResponse ¶
type DetectResponse struct {
HaveVoice bool `json:"have_voice"` // 是否有语音
VoiceStop bool `json:"voice_stop"` // 语音是否停止
SpeechProb float64 `json:"speech_prob,omitempty"` // 语音概率
Timestamp time.Time `json:"timestamp,omitempty"` // 响应时间戳
}
DetectResponse VAD 检测响应
type Detector ¶
type Detector interface {
// Detect 检测音频中的语音活动
Detect(ctx context.Context, req *DetectRequest) (*DetectResponse, error)
// HealthCheck 健康检查
HealthCheck(ctx context.Context) error
// Close 关闭检测器
Close() error
// Provider 返回提供商名称
Provider() Provider
}
Detector VAD 检测器接口
type DetectorOptions ¶
type DetectorOptions struct {
Timeout time.Duration
MaxRetries int
RetryBackoff time.Duration
Logger interface{} // *zap.Logger
}
DetectorOptions 检测器选项
type Factory ¶
type Factory interface {
// CreateDetector 创建检测器
CreateDetector(config *Config) (Detector, error)
// CreateSessionManager 创建会话管理器
CreateSessionManager(detector Detector, config *Config) (SessionManager, error)
}
Factory VAD 工厂接口
type HTTPDetector ¶
type HTTPDetector struct {
// contains filtered or unexported fields
}
HTTPDetector HTTP VAD 检测器
func NewHTTPDetector ¶
func NewHTTPDetector(config *Config, logger *zap.Logger) (*HTTPDetector, error)
NewHTTPDetector 创建新的 HTTP VAD 检测器
func (*HTTPDetector) Detect ¶
func (d *HTTPDetector) Detect(ctx context.Context, req *DetectRequest) (*DetectResponse, error)
Detect 检测音频中的语音活动
func (*HTTPDetector) HealthCheck ¶
func (d *HTTPDetector) HealthCheck(ctx context.Context) error
HealthCheck 健康检查
func (*HTTPDetector) ResetSession ¶
func (d *HTTPDetector) ResetSession(ctx context.Context, sessionID string) error
ResetSession 重置会话
func (*HTTPDetector) SetTimeout ¶
func (d *HTTPDetector) SetTimeout(timeout time.Duration)
SetTimeout 设置 HTTP 超时时间
type HealthResponse ¶
type HealthResponse struct {
Status string `json:"status"`
Service string `json:"service"`
Message string `json:"message,omitempty"`
}
HealthResponse 健康检查响应
type RMSDetector ¶
type RMSDetector struct {
// contains filtered or unexported fields
}
RMSDetector performs energy-based (RMS) gating suitable for barge-in while downlink synthesis plays.
func NewDetector ¶
func NewDetector() *RMSDetector
NewDetector is an alias for NewRMSDetector for backward compatibility.
func NewRMSDetector ¶
func NewRMSDetector() *RMSDetector
NewRMSDetector builds a detector with sipold-aligned defaults.
func (*RMSDetector) CheckBargeIn ¶
func (v *RMSDetector) CheckBargeIn(pcmData []byte, synthPlaying bool) bool
CheckBargeIn returns true when uplink PCM suggests the user is speaking during synthesis playback. pcmData must be 16-bit little-endian mono PCM (typically 20 ms @ 16 kHz from the sip1 decode path).
func (*RMSDetector) SetConsecutiveFrames ¶
func (v *RMSDetector) SetConsecutiveFrames(frames int)
SetConsecutiveFrames sets how many consecutive over-threshold frames trigger barge-in.
func (*RMSDetector) SetEnabled ¶
func (v *RMSDetector) SetEnabled(enabled bool)
SetEnabled turns detection on/off.
func (*RMSDetector) SetLogger ¶
func (v *RMSDetector) SetLogger(logger *logrus.Logger)
SetLogger attaches an optional logrus logger (debug/info).
func (*RMSDetector) SetThreshold ¶
func (v *RMSDetector) SetThreshold(threshold float64)
SetThreshold sets the RMS ceiling used with adaptive noise tracking.
type Session ¶
type Session struct {
ID string
CreatedAt time.Time
LastActivityAt time.Time
HaveVoice bool
VoiceStop bool
LastSpeechProb float64
Metadata map[string]interface{} // 自定义元数据
}
Session VAD 会话
type SessionManager ¶
type SessionManager interface {
// GetOrCreateSession 获取或创建会话
GetOrCreateSession(sessionID string) *Session
// ProcessAudio 处理音频数据
ProcessAudio(ctx context.Context, sessionID string, audioData []byte, format string, threshold ...float64) (*DetectResponse, error)
// GetSession 获取会话
GetSession(sessionID string) *Session
// ResetSession 重置会话
ResetSession(ctx context.Context, sessionID string) error
// DeleteSession 删除会话
DeleteSession(ctx context.Context, sessionID string) error
// ListSessions 列出所有活跃会话
ListSessions() []string
// Close 关闭会话管理器
Close() error
}
SessionManager VAD 会话管理器接口
type SessionManagerOptions ¶
type SessionManagerOptions struct {
SessionTTL time.Duration
CleanupInterval time.Duration
MaxSessions int
Logger interface{} // *zap.Logger
}
SessionManagerOptions 会话管理器选项
type WebSocketDetector ¶
type WebSocketDetector struct {
// contains filtered or unexported fields
}
WebSocketDetector WebSocket VAD 检测器
func NewWebSocketDetector ¶
func NewWebSocketDetector(config *Config, logger *zap.Logger) (*WebSocketDetector, error)
NewWebSocketDetector 创建新的 WebSocket VAD 检测器
func (*WebSocketDetector) Detect ¶
func (d *WebSocketDetector) Detect(ctx context.Context, req *DetectRequest) (*DetectResponse, error)
Detect 检测音频中的语音活动
func (*WebSocketDetector) HealthCheck ¶
func (d *WebSocketDetector) HealthCheck(ctx context.Context) error
HealthCheck 健康检查
func (*WebSocketDetector) IsConnected ¶
func (d *WebSocketDetector) IsConnected() bool
IsConnected 检查是否已连接
func (*WebSocketDetector) Provider ¶
func (d *WebSocketDetector) Provider() Provider
Provider 返回提供商名称
func (*WebSocketDetector) ResetSession ¶
func (d *WebSocketDetector) ResetSession(ctx context.Context, sessionID string) error
ResetSession 重置会话
type WebSocketMessage ¶
type WebSocketMessage struct {
Type string `json:"type"` // "audio", "reset"
Data string `json:"data,omitempty"` // Base64 编码的音频数据
Format string `json:"format,omitempty"` // "pcm" 或 "opus"
SessionID string `json:"session_id,omitempty"` // 会话 ID
Result *DetectResponse `json:"result,omitempty"` // 检测结果
Error string `json:"error,omitempty"` // 错误信息
Timestamp time.Time `json:"timestamp,omitempty"` // 时间戳
}
WebSocketMessage WebSocket 消息