Documentation
¶
Overview ¶
Package embedding provides embedding vector functionality for different model providers.
Index ¶
- Constants
- Variables
- func CreateProviders(config *ProviderConfig) (map[string]Provider, error)
- func GetEmbeddingModelDimensions(modelType ModelType, modelName string) (int, error)
- func ValidateEmbeddingModel(modelType ModelType, modelName string) error
- type AdvancedSearchService
- type AgentService
- type AnthropicBatchEmbeddingRequest
- type AnthropicBatchEmbeddingResponse
- type AnthropicConfig
- type AnthropicEmbeddingRequest
- type AnthropicEmbeddingResponse
- type AnthropicEmbeddingService
- func (s *AnthropicEmbeddingService) BatchGenerateEmbeddings(ctx context.Context, texts []string, contentType string, contentIDs []string) ([]*EmbeddingVector, error)
- func (s *AnthropicEmbeddingService) GenerateEmbedding(ctx context.Context, text string, contentType string, contentID string) (*EmbeddingVector, error)
- func (s *AnthropicEmbeddingService) GetModelConfig() ModelConfig
- func (s *AnthropicEmbeddingService) GetModelDimensions() int
- type BedrockConfig
- type BedrockEmbeddingService
- func (s *BedrockEmbeddingService) BatchGenerateEmbeddings(ctx context.Context, texts []string, contentType string, contentIDs []string) ([]*EmbeddingVector, error)
- func (s *BedrockEmbeddingService) GenerateEmbedding(ctx context.Context, text string, contentType string, contentID string) (*EmbeddingVector, error)
- func (s *BedrockEmbeddingService) GetModelConfig() ModelConfig
- func (s *BedrockEmbeddingService) GetModelDimensions() int
- type BedrockProvider
- type BedrockRuntimeClient
- type CachedEmbedding
- type ChunkingInterface
- type CircuitBreaker
- type CircuitBreakerConfig
- type CircuitBreakerState
- type CircuitBreakerStatus
- type CostOptimizer
- type CostOptimizerConfig
- type CostSummary
- type CrossModelSearchRequest
- type CrossModelSearchResult
- type DefaultEmbeddingPipeline
- func (p *DefaultEmbeddingPipeline) BatchProcessContent(ctx context.Context, contents []string, contentType string, ...) error
- func (p *DefaultEmbeddingPipeline) ProcessCodeChunks(ctx context.Context, contentType string, contentID string, chunkIDs []string) error
- func (p *DefaultEmbeddingPipeline) ProcessContent(ctx context.Context, content string, contentType string, contentID string) error
- func (p *DefaultEmbeddingPipeline) ProcessDiscussions(ctx context.Context, ownerRepo string, discussionIDs []string) error
- func (p *DefaultEmbeddingPipeline) ProcessIssues(ctx context.Context, ownerRepo string, issueNumbers []int) error
- type DimensionAdapter
- func (da *DimensionAdapter) GetProjectionQuality(fromDim, toDim int, provider, model string) float64
- func (da *DimensionAdapter) Normalize(embedding []float32, fromDim, toDim int) []float32
- func (da *DimensionAdapter) NormalizeWithProvider(embedding []float32, fromDim, toDim int, provider, model string) []float32
- func (da *DimensionAdapter) TrainProjectionMatrix(fromDim, toDim int, provider, model string, trainingData [][]float32) error
- type Embedding
- type EmbeddingCache
- type EmbeddingFactory
- func (f *EmbeddingFactory) CreateEmbeddingPipeline(chunkingService *chunking.ChunkingService, ...) (*DefaultEmbeddingPipeline, error)
- func (f *EmbeddingFactory) CreateEmbeddingService() (EmbeddingService, error)
- func (f *EmbeddingFactory) CreateEmbeddingStorage() (EmbeddingStorage, error)
- func (f *EmbeddingFactory) Initialize(ctx context.Context, chunkingService *chunking.ChunkingService, ...) (*DefaultEmbeddingPipeline, error)
- type EmbeddingFactoryConfig
- type EmbeddingMetric
- type EmbeddingPipelineConfig
- type EmbeddingProviderSelector
- type EmbeddingSearchResult
- type EmbeddingService
- type EmbeddingStorage
- type EmbeddingVector
- type GenerateEmbeddingRequest
- type GenerateEmbeddingResponse
- type GitHubComment
- type GitHubCommentData
- type GitHubContentAdapter
- func (a *GitHubContentAdapter) GetContent(ctx context.Context, owner string, repo string, path string) ([]byte, error)
- func (a *GitHubContentAdapter) GetIssue(ctx context.Context, owner string, repo string, issueNumber int) (*GitHubIssueData, error)
- func (a *GitHubContentAdapter) GetIssueComments(ctx context.Context, owner string, repo string, issueNumber int) ([]*GitHubCommentData, error)
- type GitHubContentProvider
- type GitHubIssue
- type GitHubIssueData
- type GoogleProvider
- type HybridSearchRequest
- type HybridSearchResult
- type InsertRequest
- type LoadBalancer
- type LoadBalancerConfig
- type MetricsFilter
- type MetricsRepository
- type MockBedrockClient
- type MockGitHubContentProvider
- func (m *MockGitHubContentProvider) GetContent(ctx context.Context, owner string, repo string, path string) ([]byte, error)
- func (m *MockGitHubContentProvider) GetIssue(ctx context.Context, owner string, repo string, issueNumber int) (*GitHubIssueData, error)
- func (m *MockGitHubContentProvider) GetIssueComments(ctx context.Context, owner string, repo string, issueNumber int) ([]*GitHubCommentData, error)
- type Model
- type ModelConfig
- type ModelFilter
- type ModelInfo
- type ModelType
- type OpenAIEmbeddingData
- type OpenAIEmbeddingRequest
- type OpenAIEmbeddingResponse
- type OpenAIEmbeddingService
- func (s *OpenAIEmbeddingService) BatchGenerateEmbeddings(ctx context.Context, texts []string, contentType string, contentIDs []string) ([]*EmbeddingVector, error)
- func (s *OpenAIEmbeddingService) GenerateEmbedding(ctx context.Context, text string, contentType string, contentID string) (*EmbeddingVector, error)
- func (s *OpenAIEmbeddingService) GetModelConfig() ModelConfig
- func (s *OpenAIEmbeddingService) GetModelDimensions() int
- type OpenAIProvider
- type OpenAIUsage
- type PgVectorStorage
- func (s *PgVectorStorage) BatchStoreEmbeddings(ctx context.Context, embeddings []*EmbeddingVector) error
- func (s *PgVectorStorage) DeleteEmbeddingsByContentIDs(ctx context.Context, contentIDs []string) error
- func (s *PgVectorStorage) FindSimilarEmbeddings(ctx context.Context, embedding *EmbeddingVector, limit int, threshold float32) ([]*EmbeddingVector, error)
- func (s *PgVectorStorage) GetEmbeddingsByContentIDs(ctx context.Context, contentIDs []string) ([]*EmbeddingVector, error)
- func (s *PgVectorStorage) StoreEmbedding(ctx context.Context, embedding *EmbeddingVector) error
- type ProjectionMatrix
- type Provider
- type ProviderCandidate
- type ProviderCapability
- type ProviderConfig
- type ProviderHealth
- type ProviderLoad
- type QualityConfig
- type QualityScore
- type QualityTracker
- type RelationshipContextEnricher
- func (e *RelationshipContextEnricher) EnrichEmbeddingMetadata(ctx context.Context, contentType string, contentID string, owner string, ...) (map[string]interface{}, error)
- func (e *RelationshipContextEnricher) EnrichEmbeddingText(ctx context.Context, contentType string, contentID string, owner string, ...) (string, error)
- func (e *RelationshipContextEnricher) WithContextDepth(depth int) *RelationshipContextEnricher
- func (e *RelationshipContextEnricher) WithDirection(direction string) *RelationshipContextEnricher
- func (e *RelationshipContextEnricher) WithMaxRelationships(max int) *RelationshipContextEnricher
- type Repository
- func (r *Repository) GetAvailableModels(ctx context.Context, filter ModelFilter) ([]Model, error)
- func (r *Repository) GetEmbeddingsByContext(ctx context.Context, contextID, tenantID uuid.UUID) ([]Embedding, error)
- func (r *Repository) GetModelByName(ctx context.Context, modelName string) (*Model, error)
- func (r *Repository) InsertEmbedding(ctx context.Context, req InsertRequest) (uuid.UUID, error)
- func (r *Repository) SearchEmbeddings(ctx context.Context, req SearchRequest) ([]EmbeddingSearchResult, error)
- type RouterConfig
- type RoutingDecision
- type RoutingRequest
- type SearchFilter
- type SearchOptions
- type SearchRequest
- type SearchResult
- type SearchResults
- type SearchService
- type SearchSort
- type ServiceV2
- func (s *ServiceV2) BatchGenerateEmbeddings(ctx context.Context, reqs []GenerateEmbeddingRequest) ([]*GenerateEmbeddingResponse, error)
- func (s *ServiceV2) GenerateBatch(ctx context.Context, texts []string, model string) ([][]float32, error)
- func (s *ServiceV2) GenerateEmbedding(ctx context.Context, req GenerateEmbeddingRequest) (*GenerateEmbeddingResponse, error)
- func (s *ServiceV2) GetProviderHealth(ctx context.Context) map[string]ProviderHealth
- func (s *ServiceV2) SetProgressCallback(fn func(float64))
- type ServiceV2Config
- type SmartRouter
- type UnifiedSearchConfig
- type UnifiedSearchService
- func (s *UnifiedSearchService) CrossModelSearch(ctx context.Context, req CrossModelSearchRequest) ([]CrossModelSearchResult, error)
- func (s *UnifiedSearchService) HybridSearch(ctx context.Context, req HybridSearchRequest) ([]HybridSearchResult, error)
- func (s *UnifiedSearchService) Search(ctx context.Context, text string, options *SearchOptions) (*SearchResults, error)
- func (s *UnifiedSearchService) SearchByContentID(ctx context.Context, contentID string, options *SearchOptions) (*SearchResults, error)
- func (s *UnifiedSearchService) SearchByVector(ctx context.Context, vector []float32, options *SearchOptions) (*SearchResults, error)
- type VoyageProvider
Constants ¶
const ( // Content types ContentTypeCodeChunk = "code_chunk" ContentTypeIssue = "issue" ContentTypeComment = "comment" ContentTypeDiscussion = "discussion" // Metadata keys MetadataKeyRepositoryOwner = "repository_owner" MetadataKeyRepositoryName = "repository_name" MetadataKeyLanguage = "language" MetadataKeyChunkType = "chunk_type" MetadataKeySourceFile = "source_file" MetadataKeyCreatedAt = "created_at" MetadataKeyContentType = "content_type" )
const ( ProviderOpenAI = "openai" ProviderVoyage = "voyage" // Anthropic's partner ProviderAmazon = "amazon" ProviderCohere = "cohere" // Available on Bedrock ProviderGoogle = "google" )
Provider constants
const ( ModelTypeText = "text" ModelTypeCode = "code" ModelTypeMultimodal = "multimodal" )
Model type constants
const StandardDimension = 1536 // OpenAI standard for cross-model compatibility
Variables ¶
var ProviderCapabilities = map[string]ProviderCapability{ "openai": { SupportsEmbeddings: true, DefaultModel: "text-embedding-3-small", EmbeddingModels: []ModelInfo{ {ModelID: "text-embedding-3-small", Dimensions: 1536, MaxTokens: 8191, CostPer1M: 0.02}, {ModelID: "text-embedding-3-large", Dimensions: 3072, MaxTokens: 8191, CostPer1M: 0.13}, {ModelID: "text-embedding-ada-002", Dimensions: 1536, MaxTokens: 8191, CostPer1M: 0.10}, }, }, "bedrock": { SupportsEmbeddings: true, DefaultModel: "amazon.titan-embed-text-v2:0", EmbeddingModels: []ModelInfo{ {ModelID: "amazon.titan-embed-text-v1", Dimensions: 1536, MaxTokens: 8192, CostPer1M: 0.02}, {ModelID: "amazon.titan-embed-text-v2:0", Dimensions: 1024, MaxTokens: 8192, CostPer1M: 0.02}, {ModelID: "cohere.embed-english-v3", Dimensions: 1024, MaxTokens: 0, CostPer1M: 0.10}, {ModelID: "cohere.embed-multilingual-v3", Dimensions: 1024, MaxTokens: 0, CostPer1M: 0.10}, }, }, "anthropic": { SupportsEmbeddings: false, EmbeddingModels: []ModelInfo{}, }, "voyage": { SupportsEmbeddings: true, DefaultModel: "voyage-2", EmbeddingModels: []ModelInfo{ {ModelID: "voyage-2", Dimensions: 1024, MaxTokens: 0, CostPer1M: 0.10}, {ModelID: "voyage-large-2", Dimensions: 1024, MaxTokens: 0, CostPer1M: 0.12}, {ModelID: "voyage-code-2", Dimensions: 1024, MaxTokens: 0, CostPer1M: 0.10}, }, }, }
ProviderCapabilities defines what each provider supports
Functions ¶
func CreateProviders ¶
func CreateProviders(config *ProviderConfig) (map[string]Provider, error)
CreateProviders creates all configured providers
func GetEmbeddingModelDimensions ¶
GetEmbeddingModelDimensions returns the dimensions for a given model
func ValidateEmbeddingModel ¶
ValidateEmbeddingModel validates an embedding model name
Types ¶
type AdvancedSearchService ¶
type AdvancedSearchService interface {
SearchService
// CrossModelSearch performs search across embeddings from different models
CrossModelSearch(ctx context.Context, req CrossModelSearchRequest) ([]CrossModelSearchResult, error)
// HybridSearch performs hybrid search combining semantic and keyword search
HybridSearch(ctx context.Context, req HybridSearchRequest) ([]HybridSearchResult, error)
}
AdvancedSearchService extends SearchService with cross-model and hybrid search capabilities
type AgentService ¶
type AgentService interface {
GetConfig(ctx context.Context, agentID string) (*agents.AgentConfig, error)
GetModelsForAgent(ctx context.Context, agentID string, taskType agents.TaskType) (primary []string, fallback []string, err error)
CreateConfig(ctx context.Context, config *agents.AgentConfig) error
UpdateConfig(ctx context.Context, agentID string, update *agents.ConfigUpdateRequest) (*agents.AgentConfig, error)
}
AgentService defines the interface for agent configuration management
type AnthropicBatchEmbeddingRequest ¶
type AnthropicBatchEmbeddingRequest struct {
Model string `json:"model"`
Texts []string `json:"texts"`
}
AnthropicBatchEmbeddingRequest represents a request to the Anthropic embeddings API for batch processing
type AnthropicBatchEmbeddingResponse ¶
type AnthropicBatchEmbeddingResponse struct {
Object string `json:"object"`
Embeddings [][]float32 `json:"embeddings"`
Model string `json:"model"`
Error interface{} `json:"error,omitempty"`
}
AnthropicBatchEmbeddingResponse represents a response from the Anthropic embeddings API for batch processing
type AnthropicConfig ¶
type AnthropicConfig struct {
// Anthropic API key
APIKey string
// Anthropic API endpoint (optional)
Endpoint string
// Anthropic model name
Model string
// For testing environments
UseMockEmbeddings bool
}
AnthropicConfig contains configuration for the Anthropic API
type AnthropicEmbeddingRequest ¶
AnthropicEmbeddingRequest represents a request to the Anthropic embeddings API
type AnthropicEmbeddingResponse ¶
type AnthropicEmbeddingResponse struct {
Object string `json:"object"`
Embedding []float32 `json:"embedding"`
Model string `json:"model"`
Error interface{} `json:"error,omitempty"`
}
AnthropicEmbeddingResponse represents a response from the Anthropic embeddings API
type AnthropicEmbeddingService ¶
type AnthropicEmbeddingService struct {
// contains filtered or unexported fields
}
AnthropicEmbeddingService implements EmbeddingService using Anthropic
func NewAnthropicEmbeddingService ¶
func NewAnthropicEmbeddingService(config *AnthropicConfig) (*AnthropicEmbeddingService, error)
NewAnthropicEmbeddingService creates a new Anthropic embedding service
func NewMockAnthropicEmbeddingService ¶
func NewMockAnthropicEmbeddingService(modelName string) (*AnthropicEmbeddingService, error)
NewMockAnthropicEmbeddingService creates a mock Anthropic embedding service for testing
func (*AnthropicEmbeddingService) BatchGenerateEmbeddings ¶
func (s *AnthropicEmbeddingService) BatchGenerateEmbeddings(ctx context.Context, texts []string, contentType string, contentIDs []string) ([]*EmbeddingVector, error)
BatchGenerateEmbeddings creates embeddings for multiple texts
func (*AnthropicEmbeddingService) GenerateEmbedding ¶
func (s *AnthropicEmbeddingService) GenerateEmbedding(ctx context.Context, text string, contentType string, contentID string) (*EmbeddingVector, error)
GenerateEmbedding creates an embedding for a single text
func (*AnthropicEmbeddingService) GetModelConfig ¶
func (s *AnthropicEmbeddingService) GetModelConfig() ModelConfig
GetModelConfig returns the model configuration
func (*AnthropicEmbeddingService) GetModelDimensions ¶
func (s *AnthropicEmbeddingService) GetModelDimensions() int
GetModelDimensions returns the dimensions of the embeddings generated by this model
type BedrockConfig ¶
type BedrockConfig struct {
// AWS Region
Region string
// AWS credentials
AccessKeyID string
SecretAccessKey string
SessionToken string
// Model ID
ModelID string
// For testing environments when AWS credentials aren't available
UseMockEmbeddings bool
}
BedrockConfig contains configuration for AWS Bedrock
type BedrockEmbeddingService ¶
type BedrockEmbeddingService struct {
// contains filtered or unexported fields
}
BedrockEmbeddingService implements EmbeddingService using AWS Bedrock
func NewBedrockEmbeddingService ¶
func NewBedrockEmbeddingService(config *BedrockConfig) (*BedrockEmbeddingService, error)
NewBedrockEmbeddingService creates a new AWS Bedrock embedding service
func NewMockBedrockEmbeddingService ¶
func NewMockBedrockEmbeddingService(modelID string) (*BedrockEmbeddingService, error)
NewMockBedrockEmbeddingService creates a mock Bedrock embedding service for testing This allows testing without requiring actual AWS credentials
func (*BedrockEmbeddingService) BatchGenerateEmbeddings ¶
func (s *BedrockEmbeddingService) BatchGenerateEmbeddings(ctx context.Context, texts []string, contentType string, contentIDs []string) ([]*EmbeddingVector, error)
BatchGenerateEmbeddings creates embeddings for multiple texts
func (*BedrockEmbeddingService) GenerateEmbedding ¶
func (s *BedrockEmbeddingService) GenerateEmbedding(ctx context.Context, text string, contentType string, contentID string) (*EmbeddingVector, error)
GenerateEmbedding creates an embedding for a single text
func (*BedrockEmbeddingService) GetModelConfig ¶
func (s *BedrockEmbeddingService) GetModelConfig() ModelConfig
GetModelConfig returns the model configuration
func (*BedrockEmbeddingService) GetModelDimensions ¶
func (s *BedrockEmbeddingService) GetModelDimensions() int
GetModelDimensions returns the dimensions of the embeddings generated by this model
type BedrockProvider ¶
type BedrockProvider struct {
// contains filtered or unexported fields
}
BedrockProvider implements the Provider interface for Amazon Bedrock embeddings
func NewBedrockProvider ¶
func NewBedrockProvider(region string) (*BedrockProvider, error)
NewBedrockProvider creates a new Bedrock embedding provider
func (*BedrockProvider) GenerateEmbedding ¶
func (p *BedrockProvider) GenerateEmbedding(ctx context.Context, content string, model string) ([]float32, error)
GenerateEmbedding generates an embedding using Amazon Bedrock
func (*BedrockProvider) GetSupportedModels ¶
func (p *BedrockProvider) GetSupportedModels() []string
GetSupportedModels returns the list of supported Bedrock models
func (*BedrockProvider) ValidateAPIKey ¶
func (p *BedrockProvider) ValidateAPIKey() error
ValidateAPIKey validates AWS credentials
type BedrockRuntimeClient ¶
type BedrockRuntimeClient interface {
InvokeModel(ctx context.Context, params *bedrockruntime.InvokeModelInput, optFns ...func(*bedrockruntime.Options)) (*bedrockruntime.InvokeModelOutput, error)
}
BedrockRuntimeClient defines an interface to allow for mocking in tests
type CachedEmbedding ¶
type CachedEmbedding struct {
Embedding []float32 `json:"embedding"`
Model string `json:"model"`
Provider string `json:"provider"`
Dimensions int `json:"dimensions"`
Metadata map[string]interface{} `json:"metadata"`
CachedAt time.Time `json:"cached_at"`
}
CachedEmbedding represents a cached embedding
type ChunkingInterface ¶
type ChunkingInterface interface {
// Here we define the minimum methods needed from the chunking service
// These methods should match what we actually use in the pipeline
ChunkCode(ctx context.Context, content string, path string) ([]*chunking.CodeChunk, error)
}
ChunkingInterface defines the interface for chunking services
type CircuitBreaker ¶
type CircuitBreaker struct {
// contains filtered or unexported fields
}
CircuitBreaker implements the circuit breaker pattern
func NewCircuitBreaker ¶
func NewCircuitBreaker(config CircuitBreakerConfig) *CircuitBreaker
NewCircuitBreaker creates a new circuit breaker
func (*CircuitBreaker) CanRequest ¶
func (cb *CircuitBreaker) CanRequest() bool
CanRequest checks if a request can be made
func (*CircuitBreaker) HealthScore ¶
func (cb *CircuitBreaker) HealthScore() float64
HealthScore returns a health score between 0 and 1
func (*CircuitBreaker) RecordFailure ¶
func (cb *CircuitBreaker) RecordFailure()
RecordFailure records a failed request
func (*CircuitBreaker) RecordSuccess ¶
func (cb *CircuitBreaker) RecordSuccess()
RecordSuccess records a successful request
func (*CircuitBreaker) Status ¶
func (cb *CircuitBreaker) Status() *CircuitBreakerStatus
Status returns the current status
type CircuitBreakerConfig ¶
type CircuitBreakerConfig struct {
FailureThreshold int
SuccessThreshold int
Timeout time.Duration
HalfOpenMaxRequests int
}
CircuitBreakerConfig configures a circuit breaker
type CircuitBreakerState ¶
type CircuitBreakerState string
CircuitBreakerState represents the state of a circuit breaker
const ( StateClosed CircuitBreakerState = "closed" StateOpen CircuitBreakerState = "open" StateHalfOpen CircuitBreakerState = "half_open" )
type CircuitBreakerStatus ¶
type CircuitBreakerStatus struct {
State string `json:"state"`
FailureCount int `json:"failure_count"`
SuccessCount int `json:"success_count"`
LastFailureTime time.Time `json:"last_failure_time,omitempty"`
LastStateChangeTime time.Time `json:"last_state_change_time"`
}
CircuitBreakerStatus represents the current status
type CostOptimizer ¶
type CostOptimizer struct {
// contains filtered or unexported fields
}
CostOptimizer tracks and optimizes costs
func NewCostOptimizer ¶
func NewCostOptimizer(config CostOptimizerConfig) *CostOptimizer
type CostOptimizerConfig ¶
type CostOptimizerConfig struct {
MaxCostPerRequest float64
}
type CostSummary ¶
type CostSummary struct {
AgentID string `json:"agent_id"`
Period string `json:"period"`
TotalCostUSD float64 `json:"total_cost_usd"`
ByProvider map[string]float64 `json:"by_provider"`
ByModel map[string]float64 `json:"by_model"`
RequestCount int `json:"request_count"`
TokensUsed int `json:"tokens_used"`
}
type CrossModelSearchRequest ¶
type CrossModelSearchRequest struct {
// Query is the search query text
Query string `json:"query"`
// QueryEmbedding is the pre-computed query embedding (optional)
QueryEmbedding []float32 `json:"query_embedding,omitempty"`
// SearchModel is the model to use for generating query embeddings
SearchModel string `json:"search_model"`
// IncludeModels limits results to specific models (empty means all)
IncludeModels []string `json:"include_models,omitempty"`
// ExcludeModels excludes results from specific models
ExcludeModels []string `json:"exclude_models,omitempty"`
// TenantID is the tenant to search within
TenantID uuid.UUID `json:"tenant_id"`
// ContextID optionally limits search to a specific context
ContextID *uuid.UUID `json:"context_id,omitempty"`
// Limit is the maximum number of results to return
Limit int `json:"limit"`
// MinSimilarity is the minimum similarity threshold
MinSimilarity float32 `json:"min_similarity"`
// MetadataFilter is a JSONB filter for metadata
MetadataFilter map[string]interface{} `json:"metadata_filter,omitempty"`
// TaskType optionally specifies the type of task for scoring
TaskType string `json:"task_type,omitempty"`
// Options for additional search parameters
Options *SearchOptions `json:"options,omitempty"`
}
CrossModelSearchRequest defines parameters for cross-model search
type CrossModelSearchResult ¶
type CrossModelSearchResult struct {
// ID is the embedding ID
ID uuid.UUID `json:"id"`
// ContextID is the context this embedding belongs to
ContextID *uuid.UUID `json:"context_id,omitempty"`
// Content is the text content
Content string `json:"content"`
// OriginalModel is the model that created this embedding
OriginalModel string `json:"original_model"`
// OriginalDimension is the original embedding dimension
OriginalDimension int `json:"original_dimension"`
// Similarity is the normalized similarity score
Similarity float32 `json:"similarity"`
// RawSimilarity is the raw similarity score before normalization
RawSimilarity float32 `json:"raw_similarity"`
// AgentID is the agent that created this content
AgentID string `json:"agent_id,omitempty"`
// Metadata contains additional information
Metadata map[string]interface{} `json:"metadata,omitempty"`
// CreatedAt is when the embedding was created
CreatedAt time.Time `json:"created_at"`
// ModelQualityScore is the quality score for this model
ModelQualityScore float32 `json:"model_quality_score"`
// FinalScore is the final weighted score
FinalScore float32 `json:"final_score"`
}
CrossModelSearchResult represents a result from cross-model search
type DefaultEmbeddingPipeline ¶
type DefaultEmbeddingPipeline struct {
// contains filtered or unexported fields
}
DefaultEmbeddingPipeline implements EmbeddingPipeline for processing different content types
func NewEmbeddingPipeline ¶
func NewEmbeddingPipeline( embeddingService EmbeddingService, storage EmbeddingStorage, chunkingService *chunking.ChunkingService, contentProvider GitHubContentProvider, config *EmbeddingPipelineConfig, ) (*DefaultEmbeddingPipeline, error)
NewEmbeddingPipeline creates a new embedding pipeline
func (*DefaultEmbeddingPipeline) BatchProcessContent ¶
func (p *DefaultEmbeddingPipeline) BatchProcessContent(ctx context.Context, contents []string, contentType string, contentIDs []string) error
BatchProcessContent processes multiple content items in a batch
func (*DefaultEmbeddingPipeline) ProcessCodeChunks ¶
func (p *DefaultEmbeddingPipeline) ProcessCodeChunks(ctx context.Context, contentType string, contentID string, chunkIDs []string) error
ProcessCodeChunks processes code chunks to generate and store embeddings
func (*DefaultEmbeddingPipeline) ProcessContent ¶
func (p *DefaultEmbeddingPipeline) ProcessContent(ctx context.Context, content string, contentType string, contentID string) error
ProcessContent processes a single content item to generate and store embeddings
func (*DefaultEmbeddingPipeline) ProcessDiscussions ¶
func (p *DefaultEmbeddingPipeline) ProcessDiscussions(ctx context.Context, ownerRepo string, discussionIDs []string) error
ProcessDiscussions processes GitHub discussions to generate and store embeddings
func (*DefaultEmbeddingPipeline) ProcessIssues ¶
func (p *DefaultEmbeddingPipeline) ProcessIssues(ctx context.Context, ownerRepo string, issueNumbers []int) error
ProcessIssues processes GitHub issues to generate and store embeddings
type DimensionAdapter ¶
type DimensionAdapter struct {
// contains filtered or unexported fields
}
DimensionAdapter handles dimension normalization and projection
func NewDimensionAdapter ¶
func NewDimensionAdapter() *DimensionAdapter
NewDimensionAdapter creates a new dimension adapter
func NewDimensionAdapterWithDB ¶
func NewDimensionAdapterWithDB(db *sql.DB) *DimensionAdapter
NewDimensionAdapterWithDB creates a new dimension adapter with database support
func (*DimensionAdapter) GetProjectionQuality ¶
func (da *DimensionAdapter) GetProjectionQuality(fromDim, toDim int, provider, model string) float64
GetProjectionQuality returns the quality score for a projection
func (*DimensionAdapter) Normalize ¶
func (da *DimensionAdapter) Normalize(embedding []float32, fromDim, toDim int) []float32
Normalize normalizes an embedding to the target dimension
func (*DimensionAdapter) NormalizeWithProvider ¶
func (da *DimensionAdapter) NormalizeWithProvider(embedding []float32, fromDim, toDim int, provider, model string) []float32
NormalizeWithProvider normalizes using provider-specific projection if available
func (*DimensionAdapter) TrainProjectionMatrix ¶
func (da *DimensionAdapter) TrainProjectionMatrix(fromDim, toDim int, provider, model string, trainingData [][]float32) error
TrainProjectionMatrix trains a new projection matrix (would be async in production)
type Embedding ¶
type Embedding struct {
ID uuid.UUID `json:"id" db:"id"`
ContextID uuid.UUID `json:"context_id" db:"context_id"`
ContentIndex int `json:"content_index" db:"content_index"`
ChunkIndex int `json:"chunk_index" db:"chunk_index"`
Content string `json:"content" db:"content"`
ContentHash string `json:"content_hash" db:"content_hash"`
ContentTokens *int `json:"content_tokens,omitempty" db:"content_tokens"`
ModelID uuid.UUID `json:"model_id" db:"model_id"`
ModelProvider string `json:"model_provider" db:"model_provider"`
ModelName string `json:"model_name" db:"model_name"`
ModelDimensions int `json:"model_dimensions" db:"model_dimensions"`
ConfiguredDimensions *int `json:"configured_dimensions,omitempty" db:"configured_dimensions"`
ProcessingTimeMS *int `json:"processing_time_ms,omitempty" db:"processing_time_ms"`
EmbeddingCreatedAt time.Time `json:"embedding_created_at" db:"embedding_created_at"`
Magnitude float64 `json:"magnitude" db:"magnitude"`
TenantID uuid.UUID `json:"tenant_id" db:"tenant_id"`
Metadata json.RawMessage `json:"metadata" db:"metadata"`
CreatedAt time.Time `json:"created_at" db:"created_at"`
UpdatedAt time.Time `json:"updated_at" db:"updated_at"`
}
Embedding represents a stored embedding
type EmbeddingCache ¶
type EmbeddingCache interface {
Get(ctx context.Context, key string) (*CachedEmbedding, error)
Set(ctx context.Context, key string, embedding *CachedEmbedding, ttl time.Duration) error
Delete(ctx context.Context, key string) error
}
EmbeddingCache defines the interface for caching embeddings
type EmbeddingFactory ¶
type EmbeddingFactory struct {
// contains filtered or unexported fields
}
EmbeddingFactory creates and configures embedding components
func NewEmbeddingFactory ¶
func NewEmbeddingFactory(config *EmbeddingFactoryConfig) (*EmbeddingFactory, error)
NewEmbeddingFactory creates a new embedding factory with the specified configuration
func (*EmbeddingFactory) CreateEmbeddingPipeline ¶
func (f *EmbeddingFactory) CreateEmbeddingPipeline( chunkingService *chunking.ChunkingService, contentProvider GitHubContentProvider, ) (*DefaultEmbeddingPipeline, error)
CreateEmbeddingPipeline creates a complete embedding pipeline
func (*EmbeddingFactory) CreateEmbeddingService ¶
func (f *EmbeddingFactory) CreateEmbeddingService() (EmbeddingService, error)
CreateEmbeddingService creates an embedding service based on the factory configuration
func (*EmbeddingFactory) CreateEmbeddingStorage ¶
func (f *EmbeddingFactory) CreateEmbeddingStorage() (EmbeddingStorage, error)
CreateEmbeddingStorage creates an embedding storage based on the factory configuration
func (*EmbeddingFactory) Initialize ¶
func (f *EmbeddingFactory) Initialize(ctx context.Context, chunkingService *chunking.ChunkingService, contentProvider GitHubContentProvider) (*DefaultEmbeddingPipeline, error)
Initialize tests all components and returns a fully configured pipeline ready for use
type EmbeddingFactoryConfig ¶
type EmbeddingFactoryConfig struct {
// Model configuration
ModelType ModelType `json:"model_type"`
ModelName string `json:"model_name"`
ModelAPIKey string `json:"model_api_key,omitempty"`
ModelEndpoint string `json:"model_endpoint,omitempty"`
ModelDimensions int `json:"model_dimensions"`
// Additional model parameters (used for provider-specific configurations)
Parameters map[string]interface{} `json:"parameters,omitempty"`
// Storage configuration
DatabaseConnection *sql.DB `json:"-"`
DatabaseSchema string `json:"database_schema"`
// Pipeline configuration
Concurrency int `json:"concurrency"`
BatchSize int `json:"batch_size"`
IncludeComments bool `json:"include_comments"`
EnrichMetadata bool `json:"enrich_metadata"`
}
EmbeddingFactoryConfig contains configuration for the embedding factory
type EmbeddingMetric ¶
type EmbeddingMetric struct {
ID uuid.UUID `json:"id" db:"id"`
AgentID string `json:"agent_id" db:"agent_id"`
ModelProvider string `json:"model_provider" db:"model_provider"`
ModelName string `json:"model_name" db:"model_name"`
ModelDimensions int `json:"model_dimensions" db:"model_dimensions"`
RequestID uuid.UUID `json:"request_id" db:"request_id"`
TokenCount int `json:"token_count" db:"token_count"`
TotalLatencyMs int `json:"total_latency_ms" db:"total_latency_ms"`
ProviderLatencyMs int `json:"provider_latency_ms" db:"provider_latency_ms"`
NormalizationLatencyMs int `json:"normalization_latency_ms" db:"normalization_latency_ms"`
CostUSD float64 `json:"cost_usd" db:"cost_usd"`
Status string `json:"status" db:"status"`
ErrorMessage string `json:"error_message" db:"error_message"`
RetryCount int `json:"retry_count" db:"retry_count"`
FinalProvider string `json:"final_provider" db:"final_provider"`
TenantID uuid.UUID `json:"tenant_id" db:"tenant_id"`
Timestamp time.Time `json:"timestamp" db:"timestamp"`
}
EmbeddingMetric represents a single metric entry
type EmbeddingPipelineConfig ¶
type EmbeddingPipelineConfig struct {
// Number of goroutines to use for parallel processing
Concurrency int
// Batch size for processing
BatchSize int
// Whether to include code comments in embeddings
IncludeComments bool
// Whether to enrich embeddings with metadata
EnrichMetadata bool
}
EmbeddingPipelineConfig holds configuration for the embedding pipeline
func DefaultEmbeddingPipelineConfig ¶
func DefaultEmbeddingPipelineConfig() *EmbeddingPipelineConfig
DefaultEmbeddingPipelineConfig returns the default embedding pipeline configuration
type EmbeddingProviderSelector ¶
type EmbeddingProviderSelector struct {
// Explicit configuration overrides
PreferredProvider string
PreferredModel string
// Auto-detection settings
EnableAutoDetection bool
ValidationMode string // "strict" or "permissive"
// contains filtered or unexported fields
}
EmbeddingProviderSelector intelligently selects and validates embedding providers
func NewEmbeddingProviderSelector ¶
func NewEmbeddingProviderSelector() *EmbeddingProviderSelector
NewEmbeddingProviderSelector creates a new selector with auto-detection
func (*EmbeddingProviderSelector) GetProviderSummary ¶
func (s *EmbeddingProviderSelector) GetProviderSummary() string
GetProviderSummary returns a summary of available providers
func (*EmbeddingProviderSelector) SelectProvider ¶
func (s *EmbeddingProviderSelector) SelectProvider() (provider string, model string, dimensions int, err error)
SelectProvider returns the best available provider and model
type EmbeddingSearchResult ¶
type EmbeddingSearchResult struct {
ID uuid.UUID `json:"id" db:"id"`
ContextID uuid.UUID `json:"context_id" db:"context_id"`
Content string `json:"content" db:"content"`
Similarity float64 `json:"similarity" db:"similarity"`
Metadata json.RawMessage `json:"metadata" db:"metadata"`
ModelProvider string `json:"model_provider" db:"model_provider"`
}
EmbeddingSearchResult represents a search result
type EmbeddingService ¶
type EmbeddingService interface {
GenerateEmbedding(ctx context.Context, text string, contentType string, contentID string) (*EmbeddingVector, error)
BatchGenerateEmbeddings(ctx context.Context, texts []string, contentType string, contentIDs []string) ([]*EmbeddingVector, error)
GetModelConfig() ModelConfig
GetModelDimensions() int
}
EmbeddingService defines the interface for generating embeddings - TEMPORARY for legacy code cleanup
type EmbeddingStorage ¶
type EmbeddingStorage interface {
StoreEmbedding(ctx context.Context, embedding *EmbeddingVector) error
BatchStoreEmbeddings(ctx context.Context, embeddings []*EmbeddingVector) error
FindSimilarEmbeddings(ctx context.Context, embedding *EmbeddingVector, limit int, threshold float32) ([]*EmbeddingVector, error)
GetEmbeddingsByContentIDs(ctx context.Context, contentIDs []string) ([]*EmbeddingVector, error)
DeleteEmbeddingsByContentIDs(ctx context.Context, contentIDs []string) error
}
EmbeddingStorage defines the interface for storing and retrieving embeddings - TEMPORARY for legacy code cleanup
type EmbeddingVector ¶
type EmbeddingVector struct {
Vector []float32 `json:"vector"`
Dimensions int `json:"dimensions"`
ModelID string `json:"model_id"`
ContentType string `json:"content_type"`
ContentID string `json:"content_id"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
}
EmbeddingVector represents a vector embedding with metadata - TEMPORARY for legacy code cleanup
type GenerateEmbeddingRequest ¶
type GenerateEmbeddingRequest struct {
AgentID string `json:"agent_id" validate:"required"`
Text string `json:"text" validate:"required,max=50000"`
TaskType agents.TaskType `json:"task_type"`
Metadata map[string]interface{} `json:"metadata"`
RequestID string `json:"request_id"`
TenantID uuid.UUID `json:"tenant_id"`
ContextID uuid.UUID `json:"context_id"`
}
GenerateEmbeddingRequest represents a request to generate an embedding
type GenerateEmbeddingResponse ¶
type GenerateEmbeddingResponse struct {
EmbeddingID uuid.UUID `json:"embedding_id"`
RequestID string `json:"request_id"`
ModelUsed string `json:"model_used"`
Provider string `json:"provider"`
Dimensions int `json:"dimensions"`
NormalizedDimensions int `json:"normalized_dimensions"`
CostUSD float64 `json:"cost_usd"`
TokensUsed int `json:"tokens_used"`
GenerationTimeMs int64 `json:"generation_time_ms"`
Cached bool `json:"cached"`
Metadata map[string]interface{} `json:"metadata"`
}
GenerateEmbeddingResponse represents the response from generating an embedding
type GitHubComment ¶
type GitHubComment struct {
ID int `json:"id"`
Body string `json:"body"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
User struct {
Login string `json:"login"`
} `json:"user"`
}
GitHubComment represents a GitHub comment
type GitHubCommentData ¶
type GitHubCommentData struct {
ID int `json:"id"`
Body string `json:"body"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
User struct {
Login string `json:"login"`
} `json:"user"`
}
GitHubCommentData represents a GitHub comment for the adapter
type GitHubContentAdapter ¶
type GitHubContentAdapter struct {
// contains filtered or unexported fields
}
GitHubContentAdapter adapts the GitHubContentManager to the GitHubContentProvider interface
func NewGitHubContentAdapter ¶
func NewGitHubContentAdapter(contentManager *core.GitHubContentManager) *GitHubContentAdapter
NewGitHubContentAdapter creates a new GitHub content adapter
func (*GitHubContentAdapter) GetContent ¶
func (a *GitHubContentAdapter) GetContent(ctx context.Context, owner string, repo string, path string) ([]byte, error)
GetContent retrieves file content from GitHub
func (*GitHubContentAdapter) GetIssue ¶
func (a *GitHubContentAdapter) GetIssue(ctx context.Context, owner string, repo string, issueNumber int) (*GitHubIssueData, error)
GetIssue retrieves issue details from GitHub
func (*GitHubContentAdapter) GetIssueComments ¶
func (a *GitHubContentAdapter) GetIssueComments(ctx context.Context, owner string, repo string, issueNumber int) ([]*GitHubCommentData, error)
GetIssueComments retrieves issue comments from GitHub
type GitHubContentProvider ¶
type GitHubContentProvider interface {
// GetContent retrieves file content from GitHub
GetContent(ctx context.Context, owner, repo, path string) ([]byte, error)
// GetIssue retrieves issue details from GitHub
GetIssue(ctx context.Context, owner, repo string, issueNumber int) (*GitHubIssueData, error)
// GetIssueComments retrieves issue comments from GitHub
GetIssueComments(ctx context.Context, owner, repo string, issueNumber int) ([]*GitHubCommentData, error)
}
GitHubContentProvider defines the interface for accessing GitHub content
type GitHubIssue ¶
type GitHubIssue struct {
Title string `json:"title"`
Body string `json:"body"`
State string `json:"state"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
GitHubIssue represents a GitHub issue
type GitHubIssueData ¶
type GitHubIssueData struct {
Title string `json:"title"`
Body string `json:"body"`
State string `json:"state"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
GitHubIssueData represents a GitHub issue for the adapter
type GoogleProvider ¶
type GoogleProvider struct {
// contains filtered or unexported fields
}
GoogleProvider implements the Provider interface for Google Vertex AI embeddings
func NewGoogleProvider ¶
func NewGoogleProvider(projectID, location, apiKey string) *GoogleProvider
NewGoogleProvider creates a new Google Vertex AI embedding provider
func (*GoogleProvider) GenerateEmbedding ¶
func (p *GoogleProvider) GenerateEmbedding(ctx context.Context, content string, model string) ([]float32, error)
GenerateEmbedding generates an embedding using Google Vertex AI
func (*GoogleProvider) GetSupportedModels ¶
func (p *GoogleProvider) GetSupportedModels() []string
GetSupportedModels returns the list of supported Google models
func (*GoogleProvider) ValidateAPIKey ¶
func (p *GoogleProvider) ValidateAPIKey() error
ValidateAPIKey validates the Google API key
type HybridSearchRequest ¶
type HybridSearchRequest struct {
// Query is the main search query for semantic search
Query string `json:"query"`
// Keywords are additional keywords for keyword-based search
Keywords []string `json:"keywords,omitempty"`
// HybridWeight determines the balance between semantic and keyword results (0.0 to 1.0)
HybridWeight float32 `json:"hybrid_weight"`
// TenantID is the tenant to search within
TenantID uuid.UUID `json:"tenant_id"`
// ModelName is the embedding model to use
ModelName string `json:"model_name"`
// Limit is the maximum number of results
Limit int `json:"limit"`
// MinSimilarity is the minimum similarity threshold
MinSimilarity float32 `json:"min_similarity"`
// MetadataFilter is a JSONB filter for metadata
MetadataFilter map[string]interface{} `json:"metadata_filter,omitempty"`
// Options for additional search parameters
Options *SearchOptions `json:"options,omitempty"`
// QueryEmbedding allows pre-computed embedding to be passed
QueryEmbedding []float32 `json:"query_embedding,omitempty"`
}
HybridSearchRequest defines parameters for hybrid search
type HybridSearchResult ¶
type HybridSearchResult struct {
// Embed the cross-model search result
CrossModelSearchResult
// Result is the combined search result
Result *SearchResult `json:"result"`
// SemanticScore is the semantic similarity score
SemanticScore float32 `json:"semantic_score"`
// KeywordScore is the keyword relevance score
KeywordScore float32 `json:"keyword_score"`
// HybridScore is the combined score
HybridScore float32 `json:"hybrid_score"`
}
HybridSearchResult represents a result from hybrid search
type InsertRequest ¶
type InsertRequest struct {
ContextID uuid.UUID `json:"context_id"`
Content string `json:"content"`
Embedding []float32 `json:"embedding"`
ModelName string `json:"model_name"`
TenantID uuid.UUID `json:"tenant_id"`
Metadata json.RawMessage `json:"metadata,omitempty"`
ContentIndex int `json:"content_index"`
ChunkIndex int `json:"chunk_index"`
ConfiguredDimensions *int `json:"configured_dimensions,omitempty"` // For models that support reduction
}
InsertRequest represents a request to insert an embedding
type LoadBalancer ¶
type LoadBalancer struct {
// contains filtered or unexported fields
}
LoadBalancer tracks provider load
func NewLoadBalancer ¶
func NewLoadBalancer(config LoadBalancerConfig) *LoadBalancer
func (*LoadBalancer) GetLoad ¶
func (lb *LoadBalancer) GetLoad(provider string) float64
func (*LoadBalancer) RecordLatency ¶
func (lb *LoadBalancer) RecordLatency(provider string, latency time.Duration)
type LoadBalancerConfig ¶
type LoadBalancerConfig struct {
Strategy string
}
type MetricsFilter ¶
type MetricsRepository ¶
type MetricsRepository interface {
RecordMetric(ctx context.Context, metric *EmbeddingMetric) error
GetMetrics(ctx context.Context, filter MetricsFilter) ([]*EmbeddingMetric, error)
GetAgentCosts(ctx context.Context, agentID string, period time.Duration) (*CostSummary, error)
}
MetricsRepository stores embedding metrics
type MockBedrockClient ¶
type MockBedrockClient struct{}
MockBedrockClient provides a mock implementation of the BedrockRuntimeClient interface for testing
func (*MockBedrockClient) InvokeModel ¶
func (m *MockBedrockClient) InvokeModel(ctx context.Context, params *bedrockruntime.InvokeModelInput, optFns ...func(*bedrockruntime.Options)) (*bedrockruntime.InvokeModelOutput, error)
InvokeModel provides a mock implementation that always returns an error Since we're using the useMockEmbeddings flag, this function should never actually be called
type MockGitHubContentProvider ¶
type MockGitHubContentProvider struct{}
MockGitHubContentProvider implements GitHubContentProvider for testing
func NewMockGitHubContentProvider ¶
func NewMockGitHubContentProvider() *MockGitHubContentProvider
NewMockGitHubContentProvider creates a new mock GitHub content provider
func (*MockGitHubContentProvider) GetContent ¶
func (m *MockGitHubContentProvider) GetContent(ctx context.Context, owner string, repo string, path string) ([]byte, error)
GetContent mocks retrieving file content from GitHub
func (*MockGitHubContentProvider) GetIssue ¶
func (m *MockGitHubContentProvider) GetIssue(ctx context.Context, owner string, repo string, issueNumber int) (*GitHubIssueData, error)
GetIssue mocks retrieving issue details from GitHub
func (*MockGitHubContentProvider) GetIssueComments ¶
func (m *MockGitHubContentProvider) GetIssueComments(ctx context.Context, owner string, repo string, issueNumber int) ([]*GitHubCommentData, error)
GetIssueComments mocks retrieving issue comments from GitHub
type Model ¶
type Model struct {
ID uuid.UUID `json:"id" db:"id"`
Provider string `json:"provider" db:"provider"`
ModelName string `json:"model_name" db:"model_name"`
ModelVersion *string `json:"model_version,omitempty" db:"model_version"`
Dimensions int `json:"dimensions" db:"dimensions"`
MaxTokens *int `json:"max_tokens,omitempty" db:"max_tokens"`
SupportsBinary bool `json:"supports_binary" db:"supports_binary"`
SupportsDimensionalityReduction bool `json:"supports_dimensionality_reduction" db:"supports_dimensionality_reduction"`
MinDimensions *int `json:"min_dimensions,omitempty" db:"min_dimensions"`
CostPerMillionTokens *float64 `json:"cost_per_million_tokens,omitempty" db:"cost_per_million_tokens"`
ModelID *string `json:"model_id,omitempty" db:"model_id"` // For Bedrock models
ModelType *string `json:"model_type,omitempty" db:"model_type"`
IsActive bool `json:"is_active" db:"is_active"`
Capabilities json.RawMessage `json:"capabilities" db:"capabilities"`
CreatedAt time.Time `json:"created_at" db:"created_at"`
}
Model represents an embedding model
type ModelConfig ¶
type ModelConfig struct {
Type ModelType `json:"type"`
Name string `json:"name"`
APIKey string `json:"api_key,omitempty"`
Endpoint string `json:"endpoint,omitempty"`
Dimensions int `json:"dimensions"`
Parameters map[string]interface{} `json:"parameters,omitempty"`
}
ModelConfig contains configuration for embedding models - TEMPORARY for legacy code cleanup
type ModelFilter ¶
type ModelFilter struct {
Provider *string `json:"provider,omitempty"`
ModelType *string `json:"model_type,omitempty"`
IsActive *bool `json:"is_active,omitempty"`
}
ModelFilter for querying available models
type ModelInfo ¶
type ModelInfo struct {
ModelID string
Dimensions int
MaxTokens int
CostPer1M float64
Notes string
}
ModelInfo contains model metadata
type ModelType ¶
type ModelType string
ModelType represents the type of embedding model - TEMPORARY for legacy code cleanup
type OpenAIEmbeddingData ¶
OpenAIEmbeddingData represents embedding data in an OpenAI API response
type OpenAIEmbeddingRequest ¶
OpenAIEmbeddingRequest represents a request to the OpenAI embeddings API
type OpenAIEmbeddingResponse ¶
type OpenAIEmbeddingResponse struct {
Data []OpenAIEmbeddingData `json:"data"`
Model string `json:"model"`
Usage OpenAIUsage `json:"usage"`
}
OpenAIEmbeddingResponse represents a response from the OpenAI embeddings API
type OpenAIEmbeddingService ¶
type OpenAIEmbeddingService struct {
// contains filtered or unexported fields
}
OpenAIEmbeddingService implements EmbeddingService using OpenAI's API
func NewOpenAIEmbeddingService ¶
func NewOpenAIEmbeddingService(apiKey string, modelName string, dimensions int) (*OpenAIEmbeddingService, error)
NewOpenAIEmbeddingService creates a new OpenAI embedding service
func (*OpenAIEmbeddingService) BatchGenerateEmbeddings ¶
func (s *OpenAIEmbeddingService) BatchGenerateEmbeddings(ctx context.Context, texts []string, contentType string, contentIDs []string) ([]*EmbeddingVector, error)
BatchGenerateEmbeddings creates embeddings for multiple texts
func (*OpenAIEmbeddingService) GenerateEmbedding ¶
func (s *OpenAIEmbeddingService) GenerateEmbedding(ctx context.Context, text string, contentType string, contentID string) (*EmbeddingVector, error)
GenerateEmbedding creates an embedding for a single text
func (*OpenAIEmbeddingService) GetModelConfig ¶
func (s *OpenAIEmbeddingService) GetModelConfig() ModelConfig
GetModelConfig returns the model configuration
func (*OpenAIEmbeddingService) GetModelDimensions ¶
func (s *OpenAIEmbeddingService) GetModelDimensions() int
GetModelDimensions returns the dimensions of the embeddings generated by this model
type OpenAIProvider ¶
type OpenAIProvider struct {
// contains filtered or unexported fields
}
OpenAIProvider implements the Provider interface for OpenAI embeddings
func NewOpenAIProvider ¶
func NewOpenAIProvider(apiKey string) *OpenAIProvider
NewOpenAIProvider creates a new OpenAI embedding provider
func (*OpenAIProvider) GenerateEmbedding ¶
func (p *OpenAIProvider) GenerateEmbedding(ctx context.Context, content string, model string) ([]float32, error)
GenerateEmbedding generates an embedding using OpenAI API
func (*OpenAIProvider) GetSupportedModels ¶
func (p *OpenAIProvider) GetSupportedModels() []string
GetSupportedModels returns the list of supported OpenAI models
func (*OpenAIProvider) ValidateAPIKey ¶
func (p *OpenAIProvider) ValidateAPIKey() error
ValidateAPIKey validates the OpenAI API key
type OpenAIUsage ¶
type OpenAIUsage struct {
PromptTokens int `json:"prompt_tokens"`
TotalTokens int `json:"total_tokens"`
}
OpenAIUsage represents usage information in an OpenAI API response
type PgVectorStorage ¶
type PgVectorStorage struct {
// contains filtered or unexported fields
}
PgVectorStorage implements EmbeddingStorage for PostgreSQL with pgvector
func NewPgVectorStorage ¶
func NewPgVectorStorage(db *sql.DB, schema string) (*PgVectorStorage, error)
NewPgVectorStorage creates a new PostgreSQL vector storage
func (*PgVectorStorage) BatchStoreEmbeddings ¶
func (s *PgVectorStorage) BatchStoreEmbeddings(ctx context.Context, embeddings []*EmbeddingVector) error
BatchStoreEmbeddings stores multiple embeddings in a batch
func (*PgVectorStorage) DeleteEmbeddingsByContentIDs ¶
func (s *PgVectorStorage) DeleteEmbeddingsByContentIDs(ctx context.Context, contentIDs []string) error
DeleteEmbeddingsByContentIDs deletes embeddings by content IDs
func (*PgVectorStorage) FindSimilarEmbeddings ¶
func (s *PgVectorStorage) FindSimilarEmbeddings(ctx context.Context, embedding *EmbeddingVector, limit int, threshold float32) ([]*EmbeddingVector, error)
FindSimilarEmbeddings finds embeddings similar to the provided one
func (*PgVectorStorage) GetEmbeddingsByContentIDs ¶
func (s *PgVectorStorage) GetEmbeddingsByContentIDs(ctx context.Context, contentIDs []string) ([]*EmbeddingVector, error)
GetEmbeddingsByContentIDs retrieves embeddings by content IDs
func (*PgVectorStorage) StoreEmbedding ¶
func (s *PgVectorStorage) StoreEmbedding(ctx context.Context, embedding *EmbeddingVector) error
StoreEmbedding stores a single embedding
type ProjectionMatrix ¶
type ProjectionMatrix struct {
ID int `json:"id" db:"id"`
FromDimensions int `json:"from_dimensions" db:"from_dimensions"`
ToDimensions int `json:"to_dimensions" db:"to_dimensions"`
FromProvider string `json:"from_provider" db:"from_provider"`
FromModel string `json:"from_model" db:"from_model"`
Matrix []float32 `json:"matrix" db:"matrix"`
QualityScore float64 `json:"quality_score" db:"quality_score"`
IsActive bool `json:"is_active" db:"is_active"`
}
ProjectionMatrix represents a dimension projection matrix
type Provider ¶
type Provider interface {
GenerateEmbedding(ctx context.Context, content string, model string) ([]float32, error)
GetSupportedModels() []string
ValidateAPIKey() error
}
Provider interface for embedding providers - TEMPORARY for legacy code cleanup
type ProviderCandidate ¶
ProviderCandidate represents a provider/model candidate
type ProviderCapability ¶
type ProviderCapability struct {
SupportsEmbeddings bool
EmbeddingModels []ModelInfo
DefaultModel string
}
ProviderCapability describes what a provider can do
type ProviderConfig ¶
type ProviderConfig struct {
// OpenAI configuration
OpenAIAPIKey string
// AWS/Bedrock configuration
AWSRegion string
// Google configuration
GoogleProjectID string
GoogleLocation string
GoogleAPIKey string
// Voyage AI configuration
VoyageAPIKey string
}
ProviderConfig contains configuration for creating providers
func NewProviderConfigFromEnv ¶
func NewProviderConfigFromEnv() *ProviderConfig
NewProviderConfigFromEnv creates provider config from environment variables
type ProviderHealth ¶
type ProviderLoad ¶
type QualityConfig ¶
type QualityConfig struct {
MinQualityScore float64
}
type QualityScore ¶
type QualityTracker ¶
type QualityTracker struct {
// contains filtered or unexported fields
}
QualityTracker tracks provider quality
func NewQualityTracker ¶
func NewQualityTracker(config QualityConfig) *QualityTracker
func (*QualityTracker) GetScore ¶
func (qt *QualityTracker) GetScore(provider, model string) float64
func (*QualityTracker) RecordFailure ¶
func (qt *QualityTracker) RecordFailure(provider string)
func (*QualityTracker) RecordSuccess ¶
func (qt *QualityTracker) RecordSuccess(provider string)
type RelationshipContextEnricher ¶
type RelationshipContextEnricher struct {
// contains filtered or unexported fields
}
RelationshipContextEnricher enhances embedding vectors with relationship context
func NewRelationshipContextEnricher ¶
func NewRelationshipContextEnricher(service relationship.Service) *RelationshipContextEnricher
NewRelationshipContextEnricher creates a new enricher for enhancing embeddings with relationship context
func (*RelationshipContextEnricher) EnrichEmbeddingMetadata ¶
func (e *RelationshipContextEnricher) EnrichEmbeddingMetadata( ctx context.Context, contentType string, contentID string, owner string, repo string, metadata map[string]interface{}, ) (map[string]interface{}, error)
EnrichEmbeddingMetadata adds relationship context to embedding metadata
func (*RelationshipContextEnricher) EnrichEmbeddingText ¶
func (e *RelationshipContextEnricher) EnrichEmbeddingText( ctx context.Context, contentType string, contentID string, owner string, repo string, originalText string, ) (string, error)
EnrichEmbeddingText adds relationship context to the text for embedding
func (*RelationshipContextEnricher) WithContextDepth ¶
func (e *RelationshipContextEnricher) WithContextDepth(depth int) *RelationshipContextEnricher
WithContextDepth sets the depth of relationships to include (1=direct, 2+=indirect)
func (*RelationshipContextEnricher) WithDirection ¶
func (e *RelationshipContextEnricher) WithDirection(direction string) *RelationshipContextEnricher
WithDirection sets the relationship direction to include
func (*RelationshipContextEnricher) WithMaxRelationships ¶
func (e *RelationshipContextEnricher) WithMaxRelationships(max int) *RelationshipContextEnricher
WithMaxRelationships sets the maximum number of relationships to include in context
type Repository ¶
type Repository struct {
// contains filtered or unexported fields
}
func NewRepository ¶
func NewRepository(db *sql.DB) *Repository
func NewRepositoryWithObservability ¶
func NewRepositoryWithObservability(db *sql.DB, logger observability.Logger, metrics observability.MetricsClient) *Repository
NewRepositoryWithObservability creates a repository with custom observability components
func (*Repository) GetAvailableModels ¶
func (r *Repository) GetAvailableModels(ctx context.Context, filter ModelFilter) ([]Model, error)
GetAvailableModels retrieves available embedding models
func (*Repository) GetEmbeddingsByContext ¶
func (r *Repository) GetEmbeddingsByContext(ctx context.Context, contextID, tenantID uuid.UUID) ([]Embedding, error)
GetEmbeddingsByContext retrieves all embeddings for a context
func (*Repository) GetModelByName ¶
GetModelByName retrieves a model by name
func (*Repository) InsertEmbedding ¶
func (r *Repository) InsertEmbedding(ctx context.Context, req InsertRequest) (uuid.UUID, error)
InsertEmbedding inserts a new embedding with automatic padding
func (*Repository) SearchEmbeddings ¶
func (r *Repository) SearchEmbeddings(ctx context.Context, req SearchRequest) ([]EmbeddingSearchResult, error)
SearchEmbeddings performs similarity search with optional metadata filtering
type RouterConfig ¶
type RouterConfig struct {
CircuitBreakerConfig CircuitBreakerConfig
LoadBalancerConfig LoadBalancerConfig
CostOptimizerConfig CostOptimizerConfig
QualityConfig QualityConfig
}
RouterConfig configures the smart router
func DefaultRouterConfig ¶
func DefaultRouterConfig() *RouterConfig
DefaultRouterConfig returns default router configuration
type RoutingDecision ¶
type RoutingDecision struct {
Candidates []ProviderCandidate
Strategy string
}
RoutingDecision represents the routing decision
type RoutingRequest ¶
type RoutingRequest struct {
AgentConfig *agents.AgentConfig
TaskType agents.TaskType
RequestID string
}
RoutingRequest represents a request for routing decision
type SearchFilter ¶
type SearchFilter struct {
// Field is the metadata field to filter on
Field string `json:"field"`
// Value is the value to match
Value interface{} `json:"value"`
// Operator is the comparison operator (eq, ne, gt, lt, gte, lte, in, contains)
Operator string `json:"operator"`
}
SearchFilter defines a filter for metadata fields
type SearchOptions ¶
type SearchOptions struct {
// ContentTypes filters results to specific content types
ContentTypes []string `json:"content_types,omitempty"`
// Filters are metadata filters to apply to the search
Filters []SearchFilter `json:"filters,omitempty"`
// Sorts defines the sort order for results
Sorts []SearchSort `json:"sorts,omitempty"`
// Limit is the maximum number of results to return
Limit int `json:"limit"`
// Offset is the number of results to skip (for pagination)
Offset int `json:"offset"`
// MinSimilarity is the minimum similarity score required (0.0 to 1.0)
MinSimilarity float32 `json:"min_similarity"`
// WeightFactors defines how to weight different scoring factors
WeightFactors map[string]float32 `json:"weight_factors,omitempty"`
}
SearchOptions contains options for search queries
type SearchRequest ¶
type SearchRequest struct {
QueryEmbedding []float32 `json:"query_embedding"`
ModelName string `json:"model_name"`
TenantID uuid.UUID `json:"tenant_id"`
ContextID *uuid.UUID `json:"context_id,omitempty"`
Limit int `json:"limit"`
Threshold float64 `json:"threshold"`
MetadataFilter json.RawMessage `json:"metadata_filter,omitempty"` // JSONB filter
}
SearchRequest represents a similarity search request
type SearchResult ¶
type SearchResult struct {
// Content is the embedding that matched
Content *EmbeddingVector `json:"content"`
// Score is the calculated relevance score (0.0 to 1.0)
Score float32 `json:"score"`
// Matches contains information about why this result matched
Matches map[string]interface{} `json:"matches,omitempty"`
}
SearchResult represents a single search result
type SearchResults ¶
type SearchResults struct {
// Results is the list of search results
Results []*SearchResult `json:"results"`
// Total is the total number of results found (for pagination)
Total int `json:"total"`
// HasMore indicates if there are more results available
HasMore bool `json:"has_more"`
}
SearchResults represents a collection of search results
type SearchService ¶
type SearchService interface {
// Search performs a vector search with the given text
Search(ctx context.Context, text string, options *SearchOptions) (*SearchResults, error)
// SearchByVector performs a vector search with a pre-computed vector
SearchByVector(ctx context.Context, vector []float32, options *SearchOptions) (*SearchResults, error)
// SearchByContentID performs a "more like this" search based on an existing content ID
SearchByContentID(ctx context.Context, contentID string, options *SearchOptions) (*SearchResults, error)
}
SearchService defines the interface for vector search operations
type SearchSort ¶
type SearchSort struct {
// Field is the field to sort on (can be "similarity" or any metadata field)
Field string `json:"field"`
// Direction is the sort direction ("asc" or "desc")
Direction string `json:"direction"`
}
SearchSort defines a sort order for results
type ServiceV2 ¶
type ServiceV2 struct {
// contains filtered or unexported fields
}
ServiceV2 is the enhanced embedding service with multi-agent support
func NewServiceV2 ¶
func NewServiceV2(config ServiceV2Config) (*ServiceV2, error)
NewServiceV2 creates a new enhanced embedding service
func (*ServiceV2) BatchGenerateEmbeddings ¶
func (s *ServiceV2) BatchGenerateEmbeddings(ctx context.Context, reqs []GenerateEmbeddingRequest) ([]*GenerateEmbeddingResponse, error)
BatchGenerateEmbeddings generates embeddings for multiple texts
func (*ServiceV2) GenerateBatch ¶
func (s *ServiceV2) GenerateBatch(ctx context.Context, texts []string, model string) ([][]float32, error)
GenerateBatch generates embeddings for multiple texts with progress tracking
func (*ServiceV2) GenerateEmbedding ¶
func (s *ServiceV2) GenerateEmbedding(ctx context.Context, req GenerateEmbeddingRequest) (*GenerateEmbeddingResponse, error)
GenerateEmbedding generates an embedding for the given request
func (*ServiceV2) GetProviderHealth ¶
func (s *ServiceV2) GetProviderHealth(ctx context.Context) map[string]ProviderHealth
GetProviderHealth returns health status of all providers
func (*ServiceV2) SetProgressCallback ¶
SetProgressCallback sets the progress callback function
type ServiceV2Config ¶
type ServiceV2Config struct {
Providers map[string]providers.Provider
AgentService AgentService
Repository *Repository
MetricsRepo MetricsRepository
Cache EmbeddingCache
RouterConfig *RouterConfig
}
ServiceV2Config contains configuration for the service
type SmartRouter ¶
type SmartRouter struct {
// contains filtered or unexported fields
}
SmartRouter handles intelligent routing between providers
func NewSmartRouter ¶
func NewSmartRouter(config *RouterConfig, providers map[string]providers.Provider) *SmartRouter
NewSmartRouter creates a new smart router
func (*SmartRouter) GetCircuitBreakerStatus ¶
func (r *SmartRouter) GetCircuitBreakerStatus(provider string) *CircuitBreakerStatus
GetCircuitBreakerStatus returns the status of a provider's circuit breaker
func (*SmartRouter) RecordResult ¶
func (r *SmartRouter) RecordResult(provider string, success bool, latency time.Duration)
RecordResult records the result of using a provider
func (*SmartRouter) SelectProvider ¶
func (r *SmartRouter) SelectProvider(ctx context.Context, req *RoutingRequest) (*RoutingDecision, error)
SelectProvider selects the best provider for the request
type UnifiedSearchConfig ¶
type UnifiedSearchConfig struct {
DB *sql.DB
Repository *Repository
SearchRepository repositorySearch.Repository
EmbeddingService EmbeddingService
DimensionAdapter *DimensionAdapter
Logger observability.Logger
Metrics observability.MetricsClient
}
UnifiedSearchConfig contains configuration for the unified search service
type UnifiedSearchService ¶
type UnifiedSearchService struct {
// contains filtered or unexported fields
}
UnifiedSearchService implements the SearchService interface with advanced features
func NewUnifiedSearchService ¶
func NewUnifiedSearchService(config *UnifiedSearchConfig) (*UnifiedSearchService, error)
NewUnifiedSearchService creates a new unified search service
func (*UnifiedSearchService) CrossModelSearch ¶
func (s *UnifiedSearchService) CrossModelSearch(ctx context.Context, req CrossModelSearchRequest) ([]CrossModelSearchResult, error)
CrossModelSearch performs search across embeddings from different models
func (*UnifiedSearchService) HybridSearch ¶
func (s *UnifiedSearchService) HybridSearch(ctx context.Context, req HybridSearchRequest) ([]HybridSearchResult, error)
HybridSearch performs hybrid search combining semantic and keyword search
func (*UnifiedSearchService) Search ¶
func (s *UnifiedSearchService) Search(ctx context.Context, text string, options *SearchOptions) (*SearchResults, error)
Search performs a vector search with the given text
func (*UnifiedSearchService) SearchByContentID ¶
func (s *UnifiedSearchService) SearchByContentID(ctx context.Context, contentID string, options *SearchOptions) (*SearchResults, error)
SearchByContentID performs a "more like this" search based on an existing content ID
func (*UnifiedSearchService) SearchByVector ¶
func (s *UnifiedSearchService) SearchByVector(ctx context.Context, vector []float32, options *SearchOptions) (*SearchResults, error)
SearchByVector performs a vector search with a pre-computed vector
type VoyageProvider ¶
type VoyageProvider struct {
// contains filtered or unexported fields
}
VoyageProvider implements the Provider interface for Voyage AI embeddings
func NewVoyageProvider ¶
func NewVoyageProvider(apiKey string) *VoyageProvider
NewVoyageProvider creates a new Voyage AI embedding provider
func (*VoyageProvider) GenerateEmbedding ¶
func (p *VoyageProvider) GenerateEmbedding(ctx context.Context, content string, model string) ([]float32, error)
GenerateEmbedding generates an embedding using Voyage AI API
func (*VoyageProvider) GetSupportedModels ¶
func (p *VoyageProvider) GetSupportedModels() []string
GetSupportedModels returns the list of supported Voyage AI models
func (*VoyageProvider) ValidateAPIKey ¶
func (p *VoyageProvider) ValidateAPIKey() error
ValidateAPIKey validates the Voyage AI API key
Source Files
¶
- anthropic.go
- bedrock.go
- circuit_breaker.go
- dimension_adapter.go
- factory.go
- github_adapter.go
- legacy_minimal_types.go
- models.go
- openai.go
- pipeline.go
- postgres.go
- provider_bedrock.go
- provider_factory.go
- provider_google.go
- provider_openai.go
- provider_selector.go
- provider_voyage.go
- relationship_context.go
- repository.go
- router.go
- search.go
- search_unified.go
- service_v2.go
- types.go