knowledge

package

v0.0.1 Latest Latest Go to latest Published: Jun 22, 2026 License: Apache-2.0 Imports: 14 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/chirino/memory-service

Links

Open Source Insights

Documentation ¶

Index ¶

func ComputeCentroid(embeddings [][]float64, indices []int) []float64
func CosineDistance(a, b []float64) float64
func GenerateLabel(keywords []ScoredKeyword, maxTerms int) string
func KeywordStrings(keywords []ScoredKeyword) []string
func SortedClusterLabels(clusters map[int][]int) []int
type Cluster
type ClusterBirth
type ClusterMember
type ClusterResult
- func RunDBSCAN(embeddings [][]float64, cfg DBSCANConfig) ClusterResult
type ClusterRunStats
type ClusterTexts
type ClusterUpdate
type Clusterer
- func NewClusterer(store KnowledgeStore, decay time.Duration, keywordsCount int, cfg DBSCANConfig) *Clusterer
- func (c *Clusterer) ClusterByConversationGroups(ctx context.Context, groupIDs []uuid.UUID)
- func (c *Clusterer) Trigger(ctx context.Context) (ClusterRunStats, error)
type DBSCANConfig
- func DefaultDBSCANConfig() DBSCANConfig
type DiffResult
- func DiffClusters(newResult ClusterResult, embeddings [][]float64, sourceIDs []uuid.UUID, ...) DiffResult
type EmbeddingRecord
type KeywordResult
- func ExtractKeywords(clusterTexts ClusterTexts, topN int) []KeywordResult
type KnowledgeStore
type PostgresKnowledgeStore
- func NewPostgresKnowledgeStore(db *gorm.DB) *PostgresKnowledgeStore
- func OpenPostgresKnowledgeStore(dbURL string) (*PostgresKnowledgeStore, error)
- func (s *PostgresKnowledgeStore) DeleteCluster(ctx context.Context, clusterID uuid.UUID) error
- func (s *PostgresKnowledgeStore) ListUsersWithEmbeddings(ctx context.Context) ([]string, error)
- func (s *PostgresKnowledgeStore) LoadClustersForUser(ctx context.Context, userID string) ([]StoredCluster, error)
- func (s *PostgresKnowledgeStore) LoadEmbeddingsForUser(ctx context.Context, userID string) ([]EmbeddingRecord, error)
- func (s *PostgresKnowledgeStore) LoadTextsForSourceIDs(ctx context.Context, sourceIDs []uuid.UUID) (map[uuid.UUID]string, error)
- func (s *PostgresKnowledgeStore) ResolveOwnersByConversationGroupIDs(ctx context.Context, groupIDs []uuid.UUID) ([]string, error)
- func (s *PostgresKnowledgeStore) SaveCluster(ctx context.Context, cluster StoredCluster, members []StoredClusterMember) error
- func (s *PostgresKnowledgeStore) UpdateCluster(ctx context.Context, cluster StoredCluster, members []StoredClusterMember) error
type ScoredKeyword
type StoredCluster
type StoredClusterMember

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

func ComputeCentroid ¶

func ComputeCentroid(embeddings [][]float64, indices []int) []float64

ComputeCentroid returns the mean vector of the given embeddings.

func CosineDistance ¶

func CosineDistance(a, b []float64) float64

CosineDistance computes 1 - cosine_similarity between two vectors. Returns 0.0 for identical directions, 2.0 for opposite directions.

func GenerateLabel ¶

func GenerateLabel(keywords []ScoredKeyword, maxTerms int) string

GenerateLabel creates a comma-separated label from the top keywords.

func KeywordStrings ¶

func KeywordStrings(keywords []ScoredKeyword) []string

KeywordStrings extracts just the term strings from scored keywords.

func SortedClusterLabels ¶

func SortedClusterLabels(clusters map[int][]int) []int

SortedClusterLabels returns cluster labels sorted by label number.

Types ¶

type Cluster ¶

type Cluster struct {
	ID          uuid.UUID
	UserID      string
	Label       string
	Keywords    []string
	Centroid    []float64
	MemberCount int
	Trend       int // 0=growing, 1=stable, 2=decaying
	SourceType  int // 0=entries, 1=memories, 2=mixed
	Members     []ClusterMember
}

Cluster represents a persisted knowledge cluster with metadata.

type ClusterBirth ¶

type ClusterBirth struct {
	Members  []int // indices in the new embedding set
	Centroid []float64
}

ClusterBirth represents a new cluster discovered by DBSCAN.

type ClusterMember ¶

type ClusterMember struct {
	SourceID   uuid.UUID
	SourceType int // 0=entry, 1=memory
}

ClusterMember identifies an embedding source.

type ClusterResult ¶

type ClusterResult struct {
	// Clusters maps cluster label (0-based) to the set of member indices.
	Clusters map[int][]int
	// Noise contains indices of points classified as noise by DBSCAN.
	Noise []int
}

ClusterResult represents the output of a single clustering run.

func RunDBSCAN ¶

func RunDBSCAN(embeddings [][]float64, cfg DBSCANConfig) ClusterResult

RunDBSCAN executes the DBSCAN algorithm on the given embeddings using cosine distance. Each embedding is a float64 slice. Returns cluster assignments.

type ClusterRunStats ¶

type ClusterRunStats struct {
	UsersProcessed  int `json:"users_processed"`
	Clustersborn    int `json:"clusters_born"`
	ClustersUpdated int `json:"clusters_updated"`
	ClustersDied    int `json:"clusters_died"`
	Failures        int `json:"failures"`
}

ClusterRunStats summarizes a single clustering cycle.

type ClusterTexts ¶

type ClusterTexts map[int]string

ClusterTexts maps a cluster label (or index) to the combined text of its members.

type ClusterUpdate ¶

type ClusterUpdate struct {
	ClusterID    uuid.UUID
	NewMembers   []int // indices in the new embedding set
	NewCentroid  []float64
	OverlapRatio float64 // fraction of new cluster members that were in the old cluster
}

ClusterUpdate represents an existing cluster that matched a new DBSCAN cluster.

type Clusterer ¶

type Clusterer struct {
	// contains filtered or unexported fields
}

Clusterer runs DBSCAN clustering on user embeddings. It is triggered by the BackgroundIndexer after new embeddings are created, or manually via the admin trigger endpoint.

func (*Clusterer) ClusterByConversationGroups ¶

func (c *Clusterer) ClusterByConversationGroups(ctx context.Context, groupIDs []uuid.UUID)

ClusterByConversationGroups resolves the owner user IDs for the given conversation group IDs, then runs DBSCAN for each affected user. Called by the BackgroundIndexer after new embeddings are created.

func (*Clusterer) Trigger ¶

func (c *Clusterer) Trigger(ctx context.Context) (ClusterRunStats, error)

Trigger runs one full clustering cycle for all users with embeddings. Used by the admin trigger endpoint.

type DBSCANConfig ¶

type DBSCANConfig struct {
	// Epsilon is the maximum cosine distance between two points to be
	// considered neighbors. Range: 0.0 (identical) to 2.0 (opposite).
	Epsilon float64
	// MinPoints is the minimum number of points required to form a dense region.
	MinPoints int
}

DBSCANConfig holds parameters for the DBSCAN algorithm.

func DefaultDBSCANConfig ¶

func DefaultDBSCANConfig() DBSCANConfig

DefaultDBSCANConfig returns sensible defaults for clustering embeddings.

type DiffResult ¶

type DiffResult struct {
	// Updated clusters: existing cluster matched to new DBSCAN output.
	Updated []ClusterUpdate
	// Born clusters: new DBSCAN clusters that don't match any existing cluster.
	Born []ClusterBirth
	// Died clusters: existing clusters with no matching new DBSCAN cluster.
	Died []uuid.UUID
}

DiffResult describes the changes between previous clusters and a new DBSCAN run.

func DiffClusters ¶

func DiffClusters(
	newResult ClusterResult,
	embeddings [][]float64,
	sourceIDs []uuid.UUID,
	existingClusters []Cluster,
) DiffResult

DiffClusters compares a new DBSCAN ClusterResult against existing clusters and determines which clusters are updated, born, or died.

members maps each existing cluster ID to the set of source IDs that belonged to it. sourceIDs is the ordered list of source IDs corresponding to the embeddings passed to DBSCAN (i.e., sourceIDs[i] is the source of embeddings[i]). embeddings are the vectors used in the current DBSCAN run.

Matching uses majority-member overlap: a new DBSCAN cluster is matched to the existing cluster that shares the highest fraction of members, provided that fraction exceeds 0.5 (majority). Each existing cluster is matched at most once (best match wins).

type EmbeddingRecord ¶

type EmbeddingRecord struct {
	SourceID   uuid.UUID // entry_id or memory_id
	SourceType int       // 0=entry, 1=memory
	UserID     string    // owner of the data
	Embedding  []float64
}

EmbeddingRecord represents a stored embedding that the clustering goroutine reads.

type KeywordResult ¶

type KeywordResult struct {
	ClusterLabel int
	Keywords     []ScoredKeyword
}

KeywordResult holds the top keywords for a single cluster.

func ExtractKeywords ¶

func ExtractKeywords(clusterTexts ClusterTexts, topN int) []KeywordResult

ExtractKeywords computes c-TF-IDF across all clusters and returns the top-N keywords per cluster. c-TF-IDF treats each cluster's combined text as a single document and identifies terms that are distinctively frequent in one cluster compared to all others.

topN controls how many keywords are returned per cluster.

type KnowledgeStore ¶

type KnowledgeStore interface {
	// ListUsersWithEmbeddings returns distinct user IDs that have embeddings.
	ListUsersWithEmbeddings(ctx context.Context) ([]string, error)

	// LoadEmbeddingsForUser returns all embeddings belonging to a user.
	LoadEmbeddingsForUser(ctx context.Context, userID string) ([]EmbeddingRecord, error)

	// LoadClustersForUser returns all stored clusters (with members) for a user.
	LoadClustersForUser(ctx context.Context, userID string) ([]StoredCluster, error)

	// SaveCluster creates a new cluster with its members.
	SaveCluster(ctx context.Context, cluster StoredCluster, members []StoredClusterMember) error

	// UpdateCluster updates an existing cluster's metadata and replaces its members.
	UpdateCluster(ctx context.Context, cluster StoredCluster, members []StoredClusterMember) error

	// DeleteCluster removes a cluster and its members.
	DeleteCluster(ctx context.Context, clusterID uuid.UUID) error

	// LoadTextsForSourceIDs returns the indexed text content for the given source IDs.
	// Used for c-TF-IDF keyword extraction after clustering.
	LoadTextsForSourceIDs(ctx context.Context, sourceIDs []uuid.UUID) (map[uuid.UUID]string, error)

	// ResolveOwnersByConversationGroupIDs returns distinct owner user IDs for the
	// given conversation group IDs. Used by the indexer to determine which users
	// need re-clustering after new embeddings are created.
	ResolveOwnersByConversationGroupIDs(ctx context.Context, groupIDs []uuid.UUID) ([]string, error)
}

KnowledgeStore defines the persistence interface for the clustering goroutine.

type PostgresKnowledgeStore ¶

type PostgresKnowledgeStore struct {
	// contains filtered or unexported fields
}

PostgresKnowledgeStore implements KnowledgeStore using GORM + PostgreSQL.

func NewPostgresKnowledgeStore ¶

func NewPostgresKnowledgeStore(db *gorm.DB) *PostgresKnowledgeStore

NewPostgresKnowledgeStore creates a new PostgreSQL-backed knowledge store.

func OpenPostgresKnowledgeStore ¶

func OpenPostgresKnowledgeStore(dbURL string) (*PostgresKnowledgeStore, error)

OpenPostgresKnowledgeStore opens a new gorm.DB connection for the knowledge store.

func (*PostgresKnowledgeStore) DeleteCluster ¶

func (s *PostgresKnowledgeStore) DeleteCluster(ctx context.Context, clusterID uuid.UUID) error

func (*PostgresKnowledgeStore) ListUsersWithEmbeddings ¶

func (s *PostgresKnowledgeStore) ListUsersWithEmbeddings(ctx context.Context) ([]string, error)

func (*PostgresKnowledgeStore) LoadClustersForUser ¶

func (s *PostgresKnowledgeStore) LoadClustersForUser(ctx context.Context, userID string) ([]StoredCluster, error)

func (*PostgresKnowledgeStore) LoadEmbeddingsForUser ¶

func (s *PostgresKnowledgeStore) LoadEmbeddingsForUser(ctx context.Context, userID string) ([]EmbeddingRecord, error)

func (*PostgresKnowledgeStore) LoadTextsForSourceIDs ¶

func (s *PostgresKnowledgeStore) LoadTextsForSourceIDs(ctx context.Context, sourceIDs []uuid.UUID) (map[uuid.UUID]string, error)

func (*PostgresKnowledgeStore) ResolveOwnersByConversationGroupIDs ¶

func (s *PostgresKnowledgeStore) ResolveOwnersByConversationGroupIDs(ctx context.Context, groupIDs []uuid.UUID) ([]string, error)

func (*PostgresKnowledgeStore) SaveCluster ¶

func (s *PostgresKnowledgeStore) SaveCluster(ctx context.Context, cluster StoredCluster, members []StoredClusterMember) error

func (*PostgresKnowledgeStore) UpdateCluster ¶

func (s *PostgresKnowledgeStore) UpdateCluster(ctx context.Context, cluster StoredCluster, members []StoredClusterMember) error

type ScoredKeyword ¶

type ScoredKeyword struct {
	Term  string
	Score float64
}

ScoredKeyword is a term with its c-TF-IDF score.

type StoredCluster ¶

type StoredCluster struct {
	ID          uuid.UUID
	UserID      string
	Label       string
	Keywords    []string
	Centroid    []float64
	MemberCount int
	Trend       int // 0=growing, 1=stable, 2=decaying
	SourceType  int // 0=entries, 1=memories, 2=mixed
	CreatedAt   time.Time
	UpdatedAt   time.Time
	Members     []StoredClusterMember
}

StoredCluster is a cluster as persisted in the database.

type StoredClusterMember ¶

type StoredClusterMember struct {
	ClusterID  uuid.UUID
	SourceID   uuid.UUID
	SourceType int
	Distance   float32
}

StoredClusterMember is a cluster membership row.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL