store

package
v0.2.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 18, 2026 License: Apache-2.0 Imports: 15 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Chunk

type Chunk struct {
	ID            int64  `json:"id"`
	DocumentID    int64  `json:"document_id"`
	ParentChunkID *int64 `json:"parent_chunk_id,omitempty"`
	Content       string `json:"content"`
	ChunkType     string `json:"chunk_type"`
	Heading       string `json:"heading"`
	PageNumber    int    `json:"page_number"`
	PositionInDoc int    `json:"position_in_doc"`
	TokenCount    int    `json:"token_count"`
	Metadata      string `json:"metadata,omitempty"`
	ContentHash   string `json:"content_hash"`
}

Chunk represents a row in the chunks table.

type ChunkImage added in v0.2.0

type ChunkImage struct {
	ID         int64  `json:"id"`
	ChunkID    int64  `json:"chunk_id"`
	DocumentID int64  `json:"document_id"`
	Caption    string `json:"caption,omitempty"`
	MIMEType   string `json:"mime_type"`
	Width      int    `json:"width"`
	Height     int    `json:"height"`
	PageNumber int    `json:"page_number"`
	Data       []byte `json:"data,omitempty"`
}

ChunkImage represents an image associated with a chunk.

type ChunkMatch

type ChunkMatch struct {
	ChunkID    int64  `json:"chunk_id"`
	Heading    string `json:"heading"`
	PageNumber int    `json:"page_number"`
}

ChunkMatch holds the result of a content substring search.

type Community

type Community struct {
	ID        int64  `json:"id"`
	Level     int    `json:"level"`
	Summary   string `json:"summary"`
	EntityIDs string `json:"entity_ids"` // JSON array
}

Community represents a row in the communities table.

type DBStats

type DBStats struct {
	Chunks        int `json:"chunks"`
	Embeddings    int `json:"embeddings"`
	Entities      int `json:"entities"`
	Relationships int `json:"relationships"`
	Communities   int `json:"communities"`
	Documents     int `json:"documents"`
}

DBStats holds counts of key database objects.

type Document

type Document struct {
	ID          int64  `json:"id"`
	Path        string `json:"path"`
	Filename    string `json:"filename"`
	Format      string `json:"format"`
	ContentHash string `json:"content_hash"`
	ParseMethod string `json:"parse_method"`
	Status      string `json:"status"`
	Metadata    string `json:"metadata,omitempty"`
	CreatedAt   string `json:"created_at"`
	UpdatedAt   string `json:"updated_at"`
}

Document represents a row in the documents table.

type Entity

type Entity struct {
	ID          int64  `json:"id"`
	Name        string `json:"name"`
	NameEN      string `json:"name_en"`
	EntityType  string `json:"entity_type"`
	Description string `json:"description"`
	EmbeddingID *int64 `json:"embedding_id,omitempty"`
	Metadata    string `json:"metadata,omitempty"`
}

Entity represents a row in the entities table.

type QueryLog

type QueryLog struct {
	Query            string      `json:"query"`
	Answer           string      `json:"answer"`
	Confidence       float64     `json:"confidence"`
	Sources          interface{} `json:"sources"`
	RetrievalMethod  string      `json:"retrieval_method"`
	ModelUsed        string      `json:"model_used"`
	Rounds           int         `json:"rounds"`
	PromptTokens     int         `json:"prompt_tokens"`
	CompletionTokens int         `json:"completion_tokens"`
	TotalTokens      int         `json:"total_tokens"`
}

QueryLog represents a row in the query_log table.

type Relationship

type Relationship struct {
	ID             int64   `json:"id"`
	SourceEntityID int64   `json:"source_entity_id"`
	TargetEntityID int64   `json:"target_entity_id"`
	RelationType   string  `json:"relation_type"`
	Weight         float64 `json:"weight"`
	Description    string  `json:"description"`
	SourceChunkID  *int64  `json:"source_chunk_id,omitempty"`
	Metadata       string  `json:"metadata,omitempty"`
}

Relationship represents a row in the relationships table.

type RetrievalResult

type RetrievalResult struct {
	ChunkID       int64   `json:"chunk_id"`
	DocumentID    int64   `json:"document_id"`
	Content       string  `json:"content"`
	Heading       string  `json:"heading"`
	ChunkType     string  `json:"chunk_type"`
	PageNumber    int     `json:"page_number"`
	PositionInDoc int     `json:"position_in_doc"`
	Filename      string  `json:"filename"`
	Path          string  `json:"path"`
	Score         float64 `json:"score"`
	ChunkMeta     string  `json:"chunk_metadata,omitempty"`
	DocMeta       string  `json:"doc_metadata,omitempty"`
}

RetrievalResult holds a chunk with its retrieval score and document info.

type Store

type Store struct {
	// contains filtered or unexported fields
}

Store wraps the SQLite database for all goreason persistence.

func New

func New(dbPath string, embeddingDim int) (*Store, error)

New opens (or creates) a SQLite database at the given path and initialises the schema including sqlite-vec and FTS5 virtual tables.

func (*Store) AllEntities

func (s *Store) AllEntities(ctx context.Context) ([]Entity, error)

AllEntities returns every entity in the database.

func (*Store) AllRelationships

func (s *Store) AllRelationships(ctx context.Context) ([]Relationship, error)

AllRelationships returns every relationship in the database.

func (*Store) ChunkHasEmbedding

func (s *Store) ChunkHasEmbedding(ctx context.Context, chunkID int64) (bool, error)

ChunkHasEmbedding checks if a specific chunk has a vector embedding.

func (*Store) ClearCommunities

func (s *Store) ClearCommunities(ctx context.Context) error

ClearCommunities removes all community data.

func (*Store) Close

func (s *Store) Close() error

Close closes the underlying database connection.

func (*Store) DB

func (s *Store) DB() *sql.DB

DB returns the underlying *sql.DB for advanced queries.

func (*Store) DBStats

func (s *Store) DBStats(ctx context.Context) (*DBStats, error)

DBStats returns counts of chunks, embeddings, entities, relationships, communities, and documents.

func (*Store) DeleteDocument

func (s *Store) DeleteDocument(ctx context.Context, id int64) error

DeleteDocument removes a document and cascades to all related data.

func (*Store) DeleteDocumentData

func (s *Store) DeleteDocumentData(ctx context.Context, docID int64) error

DeleteDocumentData removes all chunks, embeddings, images, and entity data for a document but keeps the document record itself.

func (*Store) EmbeddingDim

func (s *Store) EmbeddingDim() int

EmbeddingDim returns the configured embedding dimension.

func (*Store) FTSSearch

func (s *Store) FTSSearch(ctx context.Context, query string, limit int) ([]RetrievalResult, error)

FTSSearch performs a full-text search using FTS5 BM25 ranking.

func (*Store) GetChunksByDocument

func (s *Store) GetChunksByDocument(ctx context.Context, docID int64) ([]Chunk, error)

GetChunksByDocument returns all chunks for a given document.

func (*Store) GetCommunities

func (s *Store) GetCommunities(ctx context.Context, level int) ([]Community, error)

GetCommunities returns all communities at a given level.

func (*Store) GetCorpusLanguages

func (s *Store) GetCorpusLanguages(ctx context.Context) ([]string, error)

GetCorpusLanguages returns the distinct non-null languages across all documents.

func (*Store) GetDocument

func (s *Store) GetDocument(ctx context.Context, id int64) (*Document, error)

GetDocument retrieves a document by ID.

func (*Store) GetDocumentByPath

func (s *Store) GetDocumentByPath(ctx context.Context, path string) (*Document, error)

GetDocumentByPath retrieves a document by its file path.

func (*Store) GetEntitiesByNames

func (s *Store) GetEntitiesByNames(ctx context.Context, names []string) ([]Entity, error)

GetEntitiesByNames returns entities matching any of the given names.

func (*Store) GetImagesByChunkIDs added in v0.2.0

func (s *Store) GetImagesByChunkIDs(ctx context.Context, chunkIDs []int64, includeData bool) (map[int64][]ChunkImage, error)

GetImagesByChunkIDs returns images grouped by chunk ID. When includeData is false, the Data field is left empty to avoid loading BLOBs.

func (*Store) GetRelatedEntities

func (s *Store) GetRelatedEntities(ctx context.Context, entityIDs []int64, limit int) ([]Entity, error)

GetRelatedEntities performs a 1-hop expansion from the given seed entity IDs via the relationships table, returning entities that are directly connected but not already in the seed set. Used by synthesis-mode retrieval to discover semantically distant entities (e.g., from "seguridad y normativa" → "ip54").

func (*Store) GraphSearch

func (s *Store) GraphSearch(ctx context.Context, entityIDs []int64, limit int) ([]RetrievalResult, error)

GraphSearch finds chunks reachable via entity relationships.

func (*Store) InsertChunkImages added in v0.2.0

func (s *Store) InsertChunkImages(ctx context.Context, images []ChunkImage) error

InsertChunkImages batch-inserts images associated with chunks.

func (*Store) InsertChunks

func (s *Store) InsertChunks(ctx context.Context, chunks []Chunk) ([]int64, error)

InsertChunks inserts a batch of chunks and returns their IDs. The chunker assigns temporary position-based IDs; this method remaps ParentChunkID values to the real database IDs as chunks are inserted.

func (*Store) InsertCommunity

func (s *Store) InsertCommunity(ctx context.Context, c Community) (int64, error)

InsertCommunity stores a community detection result.

func (*Store) InsertEmbedding

func (s *Store) InsertEmbedding(ctx context.Context, chunkID int64, embedding []float32) error

InsertEmbedding stores a vector embedding for a chunk.

func (*Store) InsertRelationship

func (s *Store) InsertRelationship(ctx context.Context, r Relationship) (int64, error)

InsertRelationship creates a relationship between two entities.

func (*Store) LinkEntityChunk

func (s *Store) LinkEntityChunk(ctx context.Context, entityID, chunkID int64) error

LinkEntityChunk creates a provenance link between an entity and a chunk.

func (*Store) ListDocuments

func (s *Store) ListDocuments(ctx context.Context) ([]Document, error)

ListDocuments returns all documents ordered by creation time.

func (*Store) LogQuery

func (s *Store) LogQuery(ctx context.Context, q QueryLog) error

LogQuery writes an entry to the query audit log.

func (*Store) Migrate

func (s *Store) Migrate(ctx context.Context) error

Migrate runs all pending schema migrations.

func (*Store) SampleChunks

func (s *Store) SampleChunks(ctx context.Context, n int) ([]Chunk, error)

SampleChunks returns up to n chunks sampled from the database. Used for language detection and other heuristics.

func (*Store) SearchChunksByContent

func (s *Store) SearchChunksByContent(ctx context.Context, substring string) ([]ChunkMatch, error)

SearchChunksByContent searches all chunks for a case-insensitive substring match.

func (*Store) SearchEntitiesByNameEN

func (s *Store) SearchEntitiesByNameEN(ctx context.Context, terms []string, limit int) ([]Entity, error)

SearchEntitiesByNameEN finds entities whose English canonical name contains any of the given terms as substrings. Same pattern as SearchEntitiesByTerms but operates on the name_en column for cross-language entity matching.

func (*Store) SearchEntitiesByTerms

func (s *Store) SearchEntitiesByTerms(ctx context.Context, terms []string, limit int) ([]Entity, error)

SearchEntitiesByTerms finds entities whose names contain any of the given terms as substrings. This enables graph search to work when query terms are single words (e.g. "rejected") and entity names are multi-word phrases (e.g. "rechazador de envases"). Results are limited to avoid noise from very short or generic terms.

func (*Store) UpdateDocumentLanguage

func (s *Store) UpdateDocumentLanguage(ctx context.Context, docID int64, language string) error

UpdateDocumentLanguage sets the detected language for a document.

func (*Store) UpdateDocumentParseMethod

func (s *Store) UpdateDocumentParseMethod(ctx context.Context, id int64, method string) error

UpdateDocumentParseMethod updates just the parse_method field.

func (*Store) UpdateDocumentStatus

func (s *Store) UpdateDocumentStatus(ctx context.Context, id int64, status string) error

UpdateDocumentStatus updates just the status field.

func (*Store) UpsertDocument

func (s *Store) UpsertDocument(ctx context.Context, doc Document) (int64, error)

UpsertDocument inserts or updates a document record. Returns the document ID.

func (*Store) UpsertEntity

func (s *Store) UpsertEntity(ctx context.Context, e Entity) (int64, error)

UpsertEntity inserts or updates an entity. Returns the entity ID.

func (s *Store) UpsertEntityAndLink(ctx context.Context, e Entity, chunkID int64) (int64, error)

UpsertEntityAndLink atomically upserts an entity and links it to a chunk in a single transaction, preventing FOREIGN KEY failures from concurrent access.

func (*Store) VectorSearch

func (s *Store) VectorSearch(ctx context.Context, queryEmbedding []float32, k int) ([]RetrievalResult, error)

VectorSearch performs a KNN search returning the top-k nearest chunks.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL