vad

package
v1.4.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 8, 2026 License: MIT Imports: 13 Imported by: 0

Documentation

Overview

Package vad provides unified voice activity detection (VAD) interface supporting multiple providers (HTTP, WebSocket) with session management and health checks.

Supported Providers:

  • HTTP: Traditional HTTP-based VAD service
  • WebSocket: Real-time WebSocket-based VAD service

Usage:

// Create factory
factory := vad.NewDefaultFactory(logger)

// Create detector and session manager
config := &vad.Config{
	Provider: vad.ProviderHTTP,
	BaseURL:  "http://localhost:8080",
	Timeout:  10 * time.Second,
	SessionTTL: 5 * time.Minute,
}

detector, manager, err := factory.CreateDetectorAndManager(config)
if err != nil {
	log.Fatal(err)
}
defer detector.Close()
defer manager.Close()

// Process audio
result, err := manager.ProcessAudio(ctx, sessionID, audioData, "pcm")
if err != nil {
	log.Fatal(err)
}

if result.HaveVoice {
	// Handle voice activity
}

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Config

type Config struct {
	Provider   Provider               `json:"provider"`    // 提供商
	BaseURL    string                 `json:"base_url"`    // 基础 URL(HTTP 提供商)
	Timeout    time.Duration          `json:"timeout"`     // 超时时间
	SessionTTL time.Duration          `json:"session_ttl"` // 会话过期时间
	Options    map[string]interface{} `json:"options"`     // 提供商特定选项
}

Config VAD 配置

type DefaultFactory

type DefaultFactory struct {
	// contains filtered or unexported fields
}

DefaultFactory 默认工厂实现

func NewDefaultFactory

func NewDefaultFactory(logger *zap.Logger) *DefaultFactory

NewDefaultFactory 创建新的工厂

func (*DefaultFactory) CreateDetector

func (f *DefaultFactory) CreateDetector(config *Config) (Detector, error)

CreateDetector 创建检测器

func (*DefaultFactory) CreateDetectorAndManager

func (f *DefaultFactory) CreateDetectorAndManager(config *Config) (Detector, SessionManager, error)

CreateDetectorAndManager 创建检测器和会话管理器

func (*DefaultFactory) CreateSessionManager

func (f *DefaultFactory) CreateSessionManager(detector Detector, config *Config) (SessionManager, error)

CreateSessionManager 创建会话管理器

func (*DefaultFactory) GetSupportedProviders

func (f *DefaultFactory) GetSupportedProviders() []Provider

GetSupportedProviders 获取支持的提供商列表

func (*DefaultFactory) ValidateConfig

func (f *DefaultFactory) ValidateConfig(config *Config) error

ValidateConfig 验证配置

type DefaultSessionManager

type DefaultSessionManager struct {
	// contains filtered or unexported fields
}

DefaultSessionManager 默认会话管理器实现

func NewDefaultSessionManager

func NewDefaultSessionManager(detector Detector, config *Config, logger *zap.Logger) (*DefaultSessionManager, error)

NewDefaultSessionManager 创建新的会话管理器

func (*DefaultSessionManager) Close

func (sm *DefaultSessionManager) Close() error

Close 关闭会话管理器

func (*DefaultSessionManager) DeleteSession

func (sm *DefaultSessionManager) DeleteSession(ctx context.Context, sessionID string) error

DeleteSession 删除会话

func (*DefaultSessionManager) GetOrCreateSession

func (sm *DefaultSessionManager) GetOrCreateSession(sessionID string) *Session

GetOrCreateSession 获取或创建会话

func (*DefaultSessionManager) GetSession

func (sm *DefaultSessionManager) GetSession(sessionID string) *Session

GetSession 获取会话

func (*DefaultSessionManager) GetStats

func (sm *DefaultSessionManager) GetStats() map[string]interface{}

GetStats 获取统计信息

func (*DefaultSessionManager) ListSessions

func (sm *DefaultSessionManager) ListSessions() []string

ListSessions 列出所有活跃会话

func (*DefaultSessionManager) ProcessAudio

func (sm *DefaultSessionManager) ProcessAudio(
	ctx context.Context,
	sessionID string,
	audioData []byte,
	format string,
	threshold ...float64,
) (*DetectResponse, error)

ProcessAudio 处理音频数据

func (*DefaultSessionManager) ResetSession

func (sm *DefaultSessionManager) ResetSession(ctx context.Context, sessionID string) error

ResetSession 重置会话

func (*DefaultSessionManager) SetTTL

func (sm *DefaultSessionManager) SetTTL(ttl time.Duration)

SetTTL 设置会话过期时间

type DetectRequest

type DetectRequest struct {
	AudioData   []byte    `json:"audio_data,omitempty"` // 音频数据
	AudioFormat string    `json:"audio_format"`         // "pcm" 或 "opus"
	SampleRate  int       `json:"sample_rate"`          // 采样率
	Channels    int       `json:"channels"`             // 声道数
	Threshold   float64   `json:"threshold,omitempty"`  // VAD 阈值(可选)
	SessionID   string    `json:"session_id,omitempty"` // 会话 ID
	Timestamp   time.Time `json:"timestamp,omitempty"`  // 时间戳
}

DetectRequest VAD 检测请求

type DetectResponse

type DetectResponse struct {
	HaveVoice  bool      `json:"have_voice"`            // 是否有语音
	VoiceStop  bool      `json:"voice_stop"`            // 语音是否停止
	SpeechProb float64   `json:"speech_prob,omitempty"` // 语音概率
	Timestamp  time.Time `json:"timestamp,omitempty"`   // 响应时间戳
}

DetectResponse VAD 检测响应

type Detector

type Detector interface {
	// Detect 检测音频中的语音活动
	Detect(ctx context.Context, req *DetectRequest) (*DetectResponse, error)

	// HealthCheck 健康检查
	HealthCheck(ctx context.Context) error

	// Close 关闭检测器
	Close() error

	// Provider 返回提供商名称
	Provider() Provider
}

Detector VAD 检测器接口

type DetectorOptions

type DetectorOptions struct {
	Timeout      time.Duration
	MaxRetries   int
	RetryBackoff time.Duration
	Logger       interface{} // *zap.Logger
}

DetectorOptions 检测器选项

type Factory

type Factory interface {
	// CreateDetector 创建检测器
	CreateDetector(config *Config) (Detector, error)

	// CreateSessionManager 创建会话管理器
	CreateSessionManager(detector Detector, config *Config) (SessionManager, error)
}

Factory VAD 工厂接口

type HTTPDetector

type HTTPDetector struct {
	// contains filtered or unexported fields
}

HTTPDetector HTTP VAD 检测器

func NewHTTPDetector

func NewHTTPDetector(config *Config, logger *zap.Logger) (*HTTPDetector, error)

NewHTTPDetector 创建新的 HTTP VAD 检测器

func (*HTTPDetector) Close

func (d *HTTPDetector) Close() error

Close 关闭检测器

func (*HTTPDetector) Detect

func (d *HTTPDetector) Detect(ctx context.Context, req *DetectRequest) (*DetectResponse, error)

Detect 检测音频中的语音活动

func (*HTTPDetector) HealthCheck

func (d *HTTPDetector) HealthCheck(ctx context.Context) error

HealthCheck 健康检查

func (*HTTPDetector) Provider

func (d *HTTPDetector) Provider() Provider

Provider 返回提供商名称

func (*HTTPDetector) ResetSession

func (d *HTTPDetector) ResetSession(ctx context.Context, sessionID string) error

ResetSession 重置会话

func (*HTTPDetector) SetTimeout

func (d *HTTPDetector) SetTimeout(timeout time.Duration)

SetTimeout 设置 HTTP 超时时间

type HealthResponse

type HealthResponse struct {
	Status  string `json:"status"`
	Service string `json:"service"`
	Message string `json:"message,omitempty"`
}

HealthResponse 健康检查响应

type Provider

type Provider string

Provider VAD 提供商类型

const (
	ProviderHTTP      Provider = "http"      // HTTP 服务提供商
	ProviderWebSocket Provider = "websocket" // WebSocket 提供商
)

type RMSDetector

type RMSDetector struct {
	// contains filtered or unexported fields
}

RMSDetector performs energy-based (RMS) gating suitable for barge-in while downlink synthesis plays.

func NewDetector

func NewDetector() *RMSDetector

NewDetector is an alias for NewRMSDetector for backward compatibility.

func NewRMSDetector

func NewRMSDetector() *RMSDetector

NewRMSDetector builds a detector with sipold-aligned defaults.

func (*RMSDetector) CheckBargeIn

func (v *RMSDetector) CheckBargeIn(pcmData []byte, synthPlaying bool) bool

CheckBargeIn returns true when uplink PCM suggests the user is speaking during synthesis playback. pcmData must be 16-bit little-endian mono PCM (typically 20 ms @ 16 kHz from the sip1 decode path).

func (*RMSDetector) SetConsecutiveFrames

func (v *RMSDetector) SetConsecutiveFrames(frames int)

SetConsecutiveFrames sets how many consecutive over-threshold frames trigger barge-in.

func (*RMSDetector) SetEnabled

func (v *RMSDetector) SetEnabled(enabled bool)

SetEnabled turns detection on/off.

func (*RMSDetector) SetLogger

func (v *RMSDetector) SetLogger(logger *logrus.Logger)

SetLogger attaches an optional logrus logger (debug/info).

func (*RMSDetector) SetThreshold

func (v *RMSDetector) SetThreshold(threshold float64)

SetThreshold sets the RMS ceiling used with adaptive noise tracking.

type Session

type Session struct {
	ID             string
	CreatedAt      time.Time
	LastActivityAt time.Time
	HaveVoice      bool
	VoiceStop      bool
	LastSpeechProb float64
	Metadata       map[string]interface{} // 自定义元数据
}

Session VAD 会话

type SessionManager

type SessionManager interface {
	// GetOrCreateSession 获取或创建会话
	GetOrCreateSession(sessionID string) *Session

	// ProcessAudio 处理音频数据
	ProcessAudio(ctx context.Context, sessionID string, audioData []byte, format string, threshold ...float64) (*DetectResponse, error)

	// GetSession 获取会话
	GetSession(sessionID string) *Session

	// ResetSession 重置会话
	ResetSession(ctx context.Context, sessionID string) error

	// DeleteSession 删除会话
	DeleteSession(ctx context.Context, sessionID string) error

	// ListSessions 列出所有活跃会话
	ListSessions() []string

	// Close 关闭会话管理器
	Close() error
}

SessionManager VAD 会话管理器接口

type SessionManagerOptions

type SessionManagerOptions struct {
	SessionTTL      time.Duration
	CleanupInterval time.Duration
	MaxSessions     int
	Logger          interface{} // *zap.Logger
}

SessionManagerOptions 会话管理器选项

type WebSocketDetector

type WebSocketDetector struct {
	// contains filtered or unexported fields
}

WebSocketDetector WebSocket VAD 检测器

func NewWebSocketDetector

func NewWebSocketDetector(config *Config, logger *zap.Logger) (*WebSocketDetector, error)

NewWebSocketDetector 创建新的 WebSocket VAD 检测器

func (*WebSocketDetector) Close

func (d *WebSocketDetector) Close() error

Close 关闭检测器

func (*WebSocketDetector) Detect

Detect 检测音频中的语音活动

func (*WebSocketDetector) HealthCheck

func (d *WebSocketDetector) HealthCheck(ctx context.Context) error

HealthCheck 健康检查

func (*WebSocketDetector) IsConnected

func (d *WebSocketDetector) IsConnected() bool

IsConnected 检查是否已连接

func (*WebSocketDetector) Provider

func (d *WebSocketDetector) Provider() Provider

Provider 返回提供商名称

func (*WebSocketDetector) ResetSession

func (d *WebSocketDetector) ResetSession(ctx context.Context, sessionID string) error

ResetSession 重置会话

type WebSocketMessage

type WebSocketMessage struct {
	Type      string          `json:"type"`                 // "audio", "reset"
	Data      string          `json:"data,omitempty"`       // Base64 编码的音频数据
	Format    string          `json:"format,omitempty"`     // "pcm" 或 "opus"
	SessionID string          `json:"session_id,omitempty"` // 会话 ID
	Result    *DetectResponse `json:"result,omitempty"`     // 检测结果
	Error     string          `json:"error,omitempty"`      // 错误信息
	Timestamp time.Time       `json:"timestamp,omitempty"`  // 时间戳
}

WebSocketMessage WebSocket 消息

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL